diff options
Diffstat (limited to 'lib/Target')
382 files changed, 29021 insertions, 6011 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 0ac92f1ee8..0ad4183679 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -51,6 +51,10 @@ FunctionPass *createThumb2SizeReductionPass(); FunctionPass *createARMNaClRewritePass(); /* @LOCALMOD-END */ +/// \brief Creates an ARM-specific Target Transformation Info pass. +ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); + + void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 5ea251a795..a76715a092 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -89,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with shifter operand">; + // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", @@ -152,6 +156,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", [FeatureNEONForFP, FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, FeatureHasSlowFPVMLx]>; // FIXME: It has not been determined if A15 has these features. @@ -159,6 +164,12 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", [FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", + [FeatureSlowFPBrcc, FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -243,6 +254,11 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; +// FIXME: R5 has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r5", CortexA8Model, + [ProcR5, HasV7Ops, FeatureDB, + FeatureVFP3, FeatureDSPThumb2, + FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 3d59374aee..737a498dcd 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -29,9 +29,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -41,7 +43,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -49,7 +50,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Type.h" #include <cctype> using namespace llvm; @@ -189,7 +189,7 @@ namespace { const size_t TagHeaderSize = 1 + 4; Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(CurrentVendor, 0); + Streamer.EmitBytes(CurrentVendor); Streamer.EmitIntValue(0, 1); // '\0' Streamer.EmitIntValue(ARMBuildAttrs::File, 1); @@ -199,14 +199,14 @@ namespace { // emit each field as its type (ULEB or String) for (unsigned int i=0; i<Contents.size(); ++i) { AttributeItemType item = Contents[i]; - Streamer.EmitULEB128IntValue(item.Tag, 0); + Streamer.EmitULEB128IntValue(item.Tag); switch (item.Type) { default: llvm_unreachable("Invalid attribute type"); case AttributeItemType::NumericAttribute: - Streamer.EmitULEB128IntValue(item.IntValue, 0); + Streamer.EmitULEB128IntValue(item.IntValue); break; case AttributeItemType::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper(), 0); + Streamer.EmitBytes(item.StringValue.upper()); Streamer.EmitIntValue(0, 1); // '\0' break; } @@ -761,7 +761,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -771,7 +771,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - 4/*size*/, 0/*addrspace*/); + 4/*size*/); } Stubs.clear(); @@ -789,7 +789,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - 4/*size*/, 0/*addrspace*/); + 4/*size*/); } Stubs.clear(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2e90866f27..b6c8d108ee 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -27,9 +27,9 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" @@ -464,8 +464,9 @@ PredicateInstruction(MachineInstr *MI, unsigned Opc = MI->getOpcode(); if (isUncondBranchOpcode(Opc)) { MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); - MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); - MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); return true; } @@ -1154,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // All clear, widen the COPY. DEBUG(dbgs() << "widening: " << *MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg // or some other super-register. @@ -1165,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ MI->setDesc(get(ARM::VMOVD)); MI->getOperand(0).setReg(DstRegD); MI->getOperand(1).setReg(SrcRegD); - AddDefaultPred(MachineInstrBuilder(MI)); + AddDefaultPred(MIB); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we // are reading an undefined value from SrcRegD, but a proper value from // SrcRegS. MI->getOperand(1).setIsUndef(); - MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + MIB.addReg(SrcRegS, RegState::Implicit); // SrcRegD may actually contain an unrelated value in the ssub_1 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. @@ -1716,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // same register as operand 0. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); - NewMI->addOperand(FalseReg); + NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // The caller will erase MI, but not DefMI. @@ -3329,8 +3331,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize)) + if (MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -3821,7 +3824,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; - MachineInstrBuilder MIB(MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); switch (MI->getOpcode()) { default: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e814fda1f8..72e23aafff 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -28,11 +28,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -45,19 +44,9 @@ using namespace llvm; -static cl::opt<bool> -ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), - cl::desc("Force use of virtual base registers for stack load/store")); -static cl::opt<bool> -EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, - cl::desc("Enable pre-regalloc stack frame index allocation")); -static cl::opt<bool> -EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), - cl::desc("Enable use of a base pointer for complex stack frames")); - ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti), + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -284,9 +273,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!EnableBasePointer) - return false; - // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the // emergency spill slot. @@ -332,8 +318,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // pointer adjustments around calls. if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; - if (!EnableBasePointer) - return false; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. return MRI->canReserveReg(BasePtr); @@ -346,7 +330,8 @@ needsStackRealignment(const MachineFunction &MF) const { unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->getFnAttributes().hasAttribute(Attributes::StackAlignment)); + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); } @@ -423,7 +408,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: requiresVirtualBaseRegisters(const MachineFunction &MF) const { - return EnableLocalStackAlloc; + return true; } static void @@ -562,8 +547,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { case ARM::VLDRS: case ARM::VLDRD: case ARM::VSTRS: case ARM::VSTRD: case ARM::tSTRspi: case ARM::tLDRspi: - if (ForceAllBaseRegAlloc) - return true; break; default: return false; diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 0bd1c3ee2f..e6e8c3d5fa 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -18,8 +18,8 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 4b6a768e89..5e8e1739a9 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -28,9 +28,9 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -371,12 +371,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, } bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - assert((MF.getTarget().getRelocationModel() != Reloc::Default || - MF.getTarget().getRelocationModel() != Reloc::Static) && + TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget()); + + assert((Target.getRelocationModel() != Reloc::Default || + Target.getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); - II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); - TD = MF.getTarget().getDataLayout(); + + JTI = static_cast<ARMJITInfo*>(Target.getJITInfo()); + II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo()); + TD = Target.getDataLayout(); + Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 54ccbdddaa..57c2cce36c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,7 +26,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 1820b323f8..4e703ec3c1 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -14,11 +14,11 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Constant.h" -#include "llvm/Constants.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Type.h" #include <cstdlib> using namespace llvm; @@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s, bool AddCurrentAddress) : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, AddCurrentAddress), - S(strdup(s)) {} - -ARMConstantPoolSymbol::~ARMConstantPoolSymbol() { - free((void*)S); -} + S(s) {} ARMConstantPoolSymbol * ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, @@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); } -static bool CPV_streq(const char *S1, const char *S2) { - if (S1 == S2) - return true; - if (S1 && S2 && strcmp(S1, S2) == 0) - return true; - return false; -} - int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; @@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV); if (!APS) continue; - if (CPV_streq(APS->S, S) && equals(APS)) + if (APS->S == S && equals(APS)) return i; } } @@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV); - return ACPS && CPV_streq(ACPS->S, S) && - ARMConstantPoolValue::hasSameValue(ACPV); + return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV); } void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) { - ID.AddPointer(S); + ID.AddString(S); ARMConstantPoolValue::addSelectionDAGCSEId(ID); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 24f2fcb666..b6e0fe7c7c 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -164,19 +164,17 @@ public: /// ARMConstantPoolSymbol - ARM-specific constantpool values for external /// symbols. class ARMConstantPoolSymbol : public ARMConstantPoolValue { - const char *S; // ExtSymbol being loaded. + const std::string S; // ExtSymbol being loaded. ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); public: - ~ARMConstantPoolSymbol(); - static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s, unsigned ID, unsigned char PCAdj); - const char *getSymbol() const { return S; } + const char *getSymbol() const { return S.c_str(); } virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9cacf1b000..e072a2cb85 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -20,7 +20,6 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -30,13 +29,14 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -178,24 +178,24 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); - void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, unsigned Alignment); - unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); - unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); - unsigned ARMMaterializeInt(const Constant *C, EVT VT); - unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); - unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); - unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); + unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned ARMMaterializeInt(const Constant *C, MVT VT); + unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT); + unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg); + unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg); unsigned ARMSelectCallOp(bool UseReg); - unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT); + unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); // Call handling routines. private: @@ -221,7 +221,7 @@ class ARMFastISel : public FastISel { bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); - void AddLoadStoreOperands(EVT VT, Address &Addr, + void AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, bool useAM3); }; @@ -487,7 +487,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, // TODO: Don't worry about 64-bit now, but when this is fixed remove the // checks from the various callers. -unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { +unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) { if (VT == MVT::f64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); @@ -497,7 +497,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { return MoveReg; } -unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { +unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) { if (VT == MVT::i64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); @@ -510,7 +510,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { // For double width floating point we need to materialize two constants // (the high and the low) into integer registers then use a move to get // the combined constant into an FP reg. -unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { +unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { const APFloat Val = CFP->getValueAPF(); bool is64bit = VT == MVT::f64; @@ -554,7 +554,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { return DestReg; } -unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { +unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return false; @@ -616,7 +616,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { return DestReg; } -unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { +unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32) return 0; @@ -724,10 +724,11 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { // This assert should help detect some regressions early. assert(!FlagSfiDisableCP && "unexpected call to TargetMaterializeConstant"); // @LOCALMOD-END - EVT VT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. - if (!VT.isSimple()) return 0; + if (!CEVT.isSimple()) return 0; + MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return ARMMaterializeFP(CFP, VT); @@ -903,12 +904,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return Addr.Base.Reg != 0; } -void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { - - assert(VT.isSimple() && "Non-simple types are invalid here!"); - +void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { bool needsLowering = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: llvm_unreachable("Unhandled load/store type!"); case MVT::i1: case MVT::i8: @@ -959,13 +957,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { } } -void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, +void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. - if (VT.getSimpleVT().SimpleTy == MVT::f32 || - VT.getSimpleVT().SimpleTy == MVT::f64) + if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64) Addr.Offset /= 4; // Frame base works a bit differently. Handle it separately. @@ -1008,14 +1005,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, +bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment, bool isZExt, bool allocReg) { - assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; bool useAM3 = false; bool needVMOV = false; const TargetRegisterClass *RC; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: @@ -1132,11 +1128,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, +bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { @@ -1410,8 +1406,9 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); - EVT SrcVT = TLI.getValueType(Ty, true); - if (!SrcVT.isSimple()) return false; + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) return false; + MVT SrcVT = SrcEVT.getSimpleVT(); bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); if (isFloat && !Subtarget->hasVFP2()) @@ -1448,7 +1445,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned CmpOpc; bool isICmp = true; bool needsExt = false; - switch (SrcVT.getSimpleVT().SimpleTy) { + switch (SrcVT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1600,7 +1597,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; @@ -1609,8 +1609,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { - EVT DestVT = MVT::i32; - SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, + SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32, /*isZExt*/!isSigned); if (SrcReg == 0) return false; } @@ -1816,7 +1815,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { - EVT VT = TLI.getValueType(I->getType(), true); + EVT FPVT = TLI.getValueType(I->getType(), true); + if (!FPVT.isSimple()) return false; + MVT VT = FPVT.getSimpleVT(); // We can get here in the case when we want to use NEON for our fp // operations, but can't figure out how to. Just use the vfp instructions @@ -1847,7 +1848,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { unsigned Op2 = getRegForValue(I->getOperand(1)); if (Op2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(Op1).addReg(Op2)); @@ -2060,7 +2061,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, if (RVLocs.size() == 2 && RetVT == MVT::f64) { // For this move we copy into two registers and then move into the // double fp reg we want. - EVT DestVT = RVLocs[0].getValVT(); + MVT DestVT = RVLocs[0].getValVT(); const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2075,7 +2076,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UpdateValueMap(I, ResultReg); } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); - EVT CopyVT = RVLocs[0].getValVT(); + MVT CopyVT = RVLocs[0].getValVT(); // Special handling for extended integers. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) @@ -2106,8 +2107,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -2134,8 +2134,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT RVVT = TLI.getValueType(RV->getType()); - EVT DestVT = VA.getValVT(); + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getValVT(); // Special handling for extended integers. if (RVVT != DestVT) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) @@ -2180,7 +2182,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { unsigned ARMFastISel::getLibcallReg(const Twine &Name) { GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - return ARMMaterializeGV(GV, TLI.getValueType(GV->getType())); + EVT LCREVT = TLI.getValueType(GV->getType()); + if (!LCREVT.isSimple()) return 0; + return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } // A quick function that will emit a call for a named libcall in F with the @@ -2340,16 +2344,16 @@ bool ARMFastISel::SelectCall(const Instruction *I, ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attributes::SExt)) + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attributes::ZExt)) + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attributes::InReg) || - CS.paramHasAttr(AttrInd, Attributes::StructRet) || - CS.paramHasAttr(AttrInd, Attributes::Nest) || - CS.paramHasAttr(AttrInd, Attributes::ByVal)) + if (CS.paramHasAttr(AttrInd, Attribute::InReg) || + CS.paramHasAttr(AttrInd, Attribute::StructRet) || + CS.paramHasAttr(AttrInd, Attribute::Nest) || + CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; Type *ArgTy = (*i)->getType(); @@ -2592,7 +2596,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { return true; } -unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, +unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; @@ -2600,8 +2604,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, unsigned Opc; bool isBoolZext = false; const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); - if (!SrcVT.isSimple()) return 0; - switch (SrcVT.getSimpleVT().SimpleTy) { + switch (SrcVT.SimpleTy) { default: return 0; case MVT::i16: if (!Subtarget->hasV6Ops()) return 0; @@ -2648,14 +2651,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); - EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(SrcTy, true); - DestVT = TLI.getValueType(DestTy, true); - bool isZExt = isa<ZExtInst>(I); unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(SrcTy, true); + DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) return false; + if (!DestEVT.isSimple()) return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); @@ -2835,7 +2842,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, - unsigned Align, EVT VT) { + unsigned Align, MVT VT) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolConstant *CPV = ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index e335141d24..0f162e9514 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -16,14 +16,13 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Function.h" -#include "llvm/Function.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" // @LOCALMOD-START @@ -779,7 +778,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet) { - MIB->copyImplicitOps(&*MI); + MIB.copyImplicitOps(&*MI); MI->eraseFromParent(); } MI = MIB; @@ -1236,7 +1235,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 00214663c7..f4e77f3255 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -16,17 +16,17 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -87,6 +87,8 @@ public: return "ARM Instruction Selection"; } + virtual void PreprocessISelDAG(); + /// getI32Imm - Return a target constant of type i32 with the specified /// value. inline SDValue getI32Imm(unsigned Imm) { @@ -339,6 +341,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale, return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; } +void ARMDAGToDAGISel::PreprocessISelDAG() { + if (!Subtarget->hasV6T2Ops()) + return; + + bool isThumb2 = Subtarget->isThumb(); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (N->getOpcode() != ISD::ADD) + continue; + + // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with + // leading zeros, followed by consecutive set bits, followed by 1 or 2 + // trailing zeros, e.g. 1020. + // Transform the expression to + // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number + // of trailing zeros of c2. The left shift would be folded as an shifter + // operand of 'add' and the 'and' and 'srl' would become a bits extraction + // node (UBFX). + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned And_imm = 0; + if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { + if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) + std::swap(N0, N1); + } + if (!And_imm) + continue; + + // Check if the AND mask is an immediate of the form: 000.....1111111100 + unsigned TZ = CountTrailingZeros_32(And_imm); + if (TZ != 1 && TZ != 2) + // Be conservative here. Shifter operands aren't always free. e.g. On + // Swift, left shifter operand of 1 / 2 for free but others are not. + // e.g. + // ubfx r3, r1, #16, #8 + // ldr.w r3, [r0, r3, lsl #2] + // vs. + // mov.w r9, #1020 + // and.w r2, r9, r1, lsr #14 + // ldr r2, [r0, r2] + continue; + And_imm >>= TZ; + if (And_imm & (And_imm + 1)) + continue; + + // Look for (and (srl X, c1), c2). + SDValue Srl = N1.getOperand(0); + unsigned Srl_imm = 0; + if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || + (Srl_imm <= 2)) + continue; + + // Make sure first operand is not a shifter operand which would prevent + // folding of the left shift. + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (isThumb2) { + if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) + continue; + } else { + if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || + SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) + continue; + } + + // Now make the transformation. + Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl.getOperand(0), + CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); + N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + Srl, CurDAG->getConstant(And_imm, MVT::i32)); + N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1, CurDAG->getConstant(TZ, MVT::i32)); + CurDAG->UpdateNodeOperands(N, N0, N1); + } +} + /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at /// least on current ARM implementations) which should be avoidded. @@ -2218,10 +2301,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, if (!Subtarget->hasV6T2Ops()) return NULL; - unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + unsigned Opc = isSigned + ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); - // For unsigned extracts, check for a shift right and mask unsigned And_imm = 0; if (N->getOpcode() == ISD::AND) { @@ -2239,7 +2322,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // Note: The width operand is encoded as width-1. unsigned Width = CountTrailingOnes_32(And_imm) - 1; unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { + // It's cheaper to use a right shift to extract the top bits. + if (Subtarget->isThumb()) { + Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + + // ARM models shift instructions as MOVsi with shifter operand. + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); + SDValue ShOpc = + CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), + MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + } + SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 45d52b62ac..0e15313b9c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -25,7 +25,6 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -35,19 +34,20 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" // @LOCALMOD-START namespace llvm { @@ -904,10 +904,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair<const TargetRegisterClass*, uint8_t> -ARMTargetLowering::findRepresentativeClass(EVT VT) const{ +ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); // Use DPR as representative register class for all floating point @@ -1091,7 +1091,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -1661,8 +1661,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::MinSize); + bool HasMinSizeAttr = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -6899,7 +6899,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DefRegs[OI->getReg()] = true; } - MachineInstrBuilder MIB(&*II); + MachineInstrBuilder MIB(*MF, &*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; @@ -6976,8 +6976,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::NoImplicitFloat) && + if (!MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) { ldrOpc = ARM::VLD1q32wb_fixed; @@ -9741,33 +9742,33 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if (IsZeroVal && + if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && - !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) { bool Fast; - if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && - Fast))) { + if (Size >= 16 && + (memOpAlign(SrcAlign, DstAlign, 16) || + (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { return MVT::v2f64; - } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && - Fast))) { + } else if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { return MVT::f64; } } // Lowering to i32/i16 if the size permits. - if (Size >= 4) { + if (Size >= 4) return MVT::i32; - } else if (Size >= 2) { + else if (Size >= 2) return MVT::i16; - } // Let the target-independent logic figure it out. return MVT::Other; @@ -10570,24 +10571,6 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return false; } -bool ARMTargetLowering::isIntImmLegal(const APInt &Imm, EVT VT) const { - if (VT.getSizeInBits() > 32) - return false; - - int32_t ImmVal = Imm.getSExtValue(); - if (!Subtarget->isThumb()) { - return (ImmVal >= 0 && ImmVal < 65536) || - (ARM_AM::getSOImmVal(ImmVal) != -1) || - (ARM_AM::getSOImmVal(~ImmVal) != -1); - } else if (Subtarget->isThumb2()) { - return (ImmVal >= 0 && ImmVal < 65536) || - (ARM_AM::getT2SOImmVal(ImmVal) != -1) || - (ARM_AM::getT2SOImmVal(~ImmVal) != -1); - } else /*Thumb1*/ { - return (ImmVal >= 0 && ImmVal < 256); - } -} - /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. @@ -10669,3 +10652,4 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } + diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index c7ee13798f..e099d38200 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -294,7 +294,7 @@ namespace llvm { virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; @@ -369,7 +369,7 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual const TargetRegisterClass *getRegClassFor(EVT VT) const; + virtual const TargetRegisterClass *getRegClassFor(MVT VT) const; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. @@ -390,14 +390,12 @@ namespace llvm { /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; - virtual bool isIntImmLegal(const APInt &Imm, EVT VT) const; - virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const; protected: std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(EVT VT) const; + findRepresentativeClass(MVT VT) const; private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index a0b6f249a2..80f0ec7437 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,8 +22,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index caa5135bfc..351a290e2a 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -18,7 +18,7 @@ #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8949b50f67..2ae927e1ee 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -31,9 +31,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -1418,7 +1418,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(&*MBBI); + PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); MBB.erase(MBBI); return true; } diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 1bd2a93a8b..5a65efe3b5 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -16,7 +16,7 @@ #include "ARMAsmPrinter.h" #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp index d26a35848d..317b84a5da 100644 --- a/lib/Target/ARM/ARMNaClRewritePass.cpp +++ b/lib/Target/ARM/ARMNaClRewritePass.cpp @@ -27,7 +27,7 @@ #include "ARMNaClRewritePass.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 404634fee9..4191931a5a 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let ILPWindow = 10; // Don't reschedule small blocks to hide + // latency. Minimum latency requirements are already + // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 72be065d64..0056562719 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "arm-selectiondag-info" #include "ARMTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/DerivedTypes.h" using namespace llvm; ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index f5ea64d42b..0da446aa02 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -14,7 +14,7 @@ #include "ARMSubtarget.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -84,6 +84,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasDataBarrier(false) , Pref32BitThumb(false) , AvoidCPSRPartialUpdate(false) + , AvoidMOVsShifterOperand(false) , HasRAS(false) , HasMPExtension(false) , FPOnlySP(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 28975f9243..c0e834c648 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -39,7 +39,7 @@ class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA8, CortexA9, CortexA15, Swift + Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -145,6 +145,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting + /// movs with shifter operand (i.e. asr, lsl, lsr). + bool AvoidMOVsShifterOperand; + /// HasRAS - Some processors perform return stack prediction. CodeGen should /// avoid issue "normal" call instructions to callees which do not return. bool HasRAS; @@ -225,6 +229,7 @@ protected: bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } bool isLikeA9() const { return isCortexA9() || isCortexA15(); } + bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool hasARMOps() const { return !NoARM; } @@ -246,6 +251,7 @@ protected: bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 533be838d8..c02e981e54 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -58,6 +58,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, this->Options.FloatABIType = FloatABI::Soft; } +void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our ARM pass. This + // allows the ARM pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createARMTargetTransformInfoPass(this)); +} + + void ARMTargetMachine::anchor() { } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, @@ -77,8 +86,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "v128:64:128-v64:64:64-n32-S32")), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), - STTI(&TLInfo), VTTI(&TLInfo) { + FrameLowering(Subtarget) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -110,8 +118,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), - STTI(&TLInfo), VTTI(&TLInfo) { + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } namespace { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 2d265afa1b..e6ad5979f9 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -24,10 +24,9 @@ #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" // @LOCALMOD-START #include "llvm/Support/CommandLine.h" @@ -58,6 +57,9 @@ public: return &InstrItins; } + /// \brief Register ARM analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -73,8 +75,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -96,12 +96,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const DataLayout *getDataLayout() const { return &DL; } }; @@ -119,8 +113,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -149,12 +141,6 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } }; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp new file mode 100644 index 0000000000..404a6fff11 --- /dev/null +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -0,0 +1,159 @@ +//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// ARM target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "armtti" +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeARMTTIPass(PassRegistry &); +} + +namespace { + +class ARMTTI : public ImmutablePass, public TargetTransformInfo { + const ARMBaseTargetMachine *TM; + const ARMSubtarget *ST; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + ARMTTI() : ImmutablePass(ID), TM(0), ST(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + ARMTTI(const ARMBaseTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()) { + initializeARMTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + + /// @} + + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 16; + return 0; + } + + if (ST->isThumb1Only()) + return 8; + return 16; + } + + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + + return 32; + } + + unsigned getMaximumUnrollFactor() const { + // These are out of order CPUs: + if (ST->isCortexA15() || ST->isSwift()) + return 2; + return 1; + } + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti", + "ARM Target Transform Info", true, true, false) +char ARMTTI::ID = 0; + +ImmutablePass * +llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) { + return new ARMTTI(TM); +} + + +unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return 4; + + int32_t SImmVal = Imm.getSExtValue(); + uint32_t ZImmVal = Imm.getZExtValue(); + if (!ST->isThumb()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getSOImmVal(ZImmVal) != -1) || + (ARM_AM::getSOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else if (ST->isThumb2()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || + (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else /*Thumb1*/ { + if (SImmVal >= 0 && SImmVal < 256) + return 1; + if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + return 2; + // Load from constantpool. + return 3; + } + return 2; +} diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp deleted file mode 100644 index 3ada0f2af6..0000000000 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ /dev/null @@ -1,134 +0,0 @@ -//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARMBaseInfo.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Support/TargetRegistry.h" -#include <map> -#include <string> - -using namespace llvm; - -namespace { - -class ARMBaseAsmLexer : public MCTargetAsmLexer { - const MCAsmInfo &AsmInfo; - - const AsmToken &lexDefinite() { - return getLexer()->Lex(); - } - - AsmToken LexTokenUAL(); -protected: - typedef std::map <std::string, unsigned> rmap_ty; - - rmap_ty RegisterMap; - - void InitRegisterMap(const MCRegisterInfo *info) { - unsigned numRegs = info->getNumRegs(); - - for (unsigned i = 0; i < numRegs; ++i) { - const char *regName = info->getName(i); - if (regName) - RegisterMap[regName] = i; - } - } - - unsigned MatchRegisterName(StringRef Name) { - rmap_ty::iterator iter = RegisterMap.find(Name.str()); - if (iter != RegisterMap.end()) - return iter->second; - else - return 0; - } - - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenUAL(); - } - } -public: - ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : MCTargetAsmLexer(T), AsmInfo(MAI) { - } -}; - -class ARMAsmLexer : public ARMBaseAsmLexer { -public: - ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } -}; - -class ThumbAsmLexer : public ARMBaseAsmLexer { -public: - ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } -}; - -} // end anonymous namespace - -AsmToken ARMBaseAsmLexer::LexTokenUAL() { - const AsmToken &lexedToken = lexDefinite(); - - switch (lexedToken.getKind()) { - default: break; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - break; - case AsmToken::Identifier: { - std::string lowerCase = lexedToken.getString().lower(); - - unsigned regID = MatchRegisterName(lowerCase); - // Check for register aliases. - // r13 -> sp - // r14 -> lr - // r15 -> pc - // ip -> r12 - // FIXME: Some assemblers support lots of others. Do we want them all? - if (!regID) { - regID = StringSwitch<unsigned>(lowerCase) - .Case("r13", ARM::SP) - .Case("r14", ARM::LR) - .Case("r15", ARM::PC) - .Case("ip", ARM::R12) - .Default(0); - } - - if (regID) - return AsmToken(AsmToken::Register, - lexedToken.getString(), - static_cast<int64_t>(regID)); - } - } - - return AsmToken(lexedToken); -} - -extern "C" void LLVMInitializeARMAsmLexer() { - RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget); - RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); -} diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4685b1d193..e9bdc4ad9b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -178,7 +178,8 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, + SMLoc &EndLoc); // Asm Match Converter Methods void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); @@ -270,7 +271,7 @@ public: namespace { /// ARMOperand - Instances of this class represent a parsed ARM machine -/// instruction. +/// operand. class ARMOperand : public MCParsedAsmOperand { enum KindTy { k_CondCode, @@ -2450,8 +2451,8 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); RegNo = tryParseRegister(); - EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -2540,6 +2541,8 @@ int ARMAsmParser::tryParseShiftRegister( if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); + + SMLoc EndLoc; int64_t Imm = 0; int ShiftReg = 0; if (ShiftTy == ARM_AM::rrx) { @@ -2554,7 +2557,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; - if (getParser().ParseExpression(ShiftExpr)) { + if (getParser().ParseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; } @@ -2579,8 +2582,9 @@ int ARMAsmParser::tryParseShiftRegister( if (Imm == 0) ShiftTy = ARM_AM::lsl; } else if (Parser.getTok().is(AsmToken::Identifier)) { - ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); + ShiftReg = tryParseRegister(); if (ShiftReg == -1) { Error (L, "expected immediate or register in shift operand"); return -1; @@ -2595,10 +2599,10 @@ int ARMAsmParser::tryParseShiftRegister( if (ShiftReg && ShiftTy != ARM_AM::rrx) Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, ShiftReg, Imm, - S, Parser.getTok().getLoc())); + S, EndLoc)); else Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm, - S, Parser.getTok().getLoc())); + S, EndLoc)); return 0; } @@ -2612,12 +2616,13 @@ int ARMAsmParser::tryParseShiftRegister( /// parse for a specific register type. bool ARMAsmParser:: tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - SMLoc S = Parser.getTok().getLoc(); + const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) return true; - Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(), + RegTok.getEndLoc())); const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { @@ -2641,10 +2646,10 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!MCE) return TokError("immediate value expected for vector index"); - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), @@ -2794,7 +2799,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Check for and consume the closing '}' if (Parser.getTok().isNot(AsmToken::RCurly)) return MatchOperand_ParseFail; - SMLoc E = Parser.getTok().getLoc(); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the '}' Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E)); @@ -2891,10 +2896,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { Parser.Lex(); // Eat the minus. - SMLoc EndLoc = Parser.getTok().getLoc(); + SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) - return Error(EndLoc, "register expected"); + return Error(AfterMinusLoc, "register expected"); // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) EndReg = getDRegFromQReg(EndReg) + 1; @@ -2904,10 +2909,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; // The register must be in the same register class as the first. if (!RC->contains(EndReg)) - return Error(EndLoc, "invalid register in register list"); + return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) - return Error(EndLoc, "bad range in register list"); + return Error(AfterMinusLoc, "bad range in register list"); // Add all the registers in the range to the register list. while (Reg != EndReg) { @@ -2955,9 +2960,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); } - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RCurly)) - return Error(E, "'}' expected"); + return Error(Parser.getTok().getLoc(), "'}' expected"); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat '}' token. // Push the register list operand. @@ -2974,13 +2979,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Helper function to parse the lane index for vector lists. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { Index = 0; // Always return a defined index value. if (Parser.getTok().is(AsmToken::LBrac)) { Parser.Lex(); // Eat the '['. if (Parser.getTok().is(AsmToken::RBrac)) { // "Dn[]" is the 'all lanes' syntax. LaneKind = AllLanes; + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } @@ -3005,6 +3011,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Error(Parser.getTok().getLoc(), "']' expected"); return MatchOperand_ParseFail; } + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ']'. int64_t Val = CE->getValue(); @@ -3031,21 +3038,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // (without encosing curly braces) as a single or double entry list, // respectively. if (Parser.getTok().is(AsmToken::Identifier)) { + SMLoc E = Parser.getTok().getEndLoc(); int Reg = tryParseRegister(); if (Reg == -1) return MatchOperand_NoMatch; - SMLoc E = Parser.getTok().getLoc(); if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { - OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E); if (Res != MatchOperand_Success) return Res; switch (LaneKind) { case NoLanes: - E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); break; case AllLanes: - E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false, S, E)); break; @@ -3059,18 +3064,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E); if (Res != MatchOperand_Success) return Res; switch (LaneKind) { case NoLanes: - E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: - E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, @@ -3111,7 +3114,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ++Reg; ++Count; } - if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + + SMLoc E; + if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success) return MatchOperand_ParseFail; while (Parser.getTok().is(AsmToken::Comma) || @@ -3125,10 +3130,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat the minus. - SMLoc EndLoc = Parser.getTok().getLoc(); + SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) { - Error(EndLoc, "register expected"); + Error(AfterMinusLoc, "register expected"); return MatchOperand_ParseFail; } // Allow Q regs and just interpret them as the two D sub-registers. @@ -3140,24 +3145,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; // The register must be in the same register class as the first. if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) { - Error(EndLoc, "invalid register in register list"); + Error(AfterMinusLoc, "invalid register in register list"); return MatchOperand_ParseFail; } // Ranges must go from low to high. if (Reg > EndReg) { - Error(EndLoc, "bad range in register list"); + Error(AfterMinusLoc, "bad range in register list"); return MatchOperand_ParseFail; } // Parse the lane specifier if present. VectorLaneTy NextLaneKind; unsigned NextLaneIndex; - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != + MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { - Error(EndLoc, "mismatched lane index in register list"); + Error(AfterMinusLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } - EndLoc = Parser.getTok().getLoc(); // Add all the registers in the range to the register list. Count += EndReg - Reg; @@ -3196,11 +3201,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Parse the lane specifier if present. VectorLaneTy NextLaneKind; unsigned NextLaneIndex; - SMLoc EndLoc = Parser.getTok().getLoc(); - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + SMLoc LaneLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != + MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { - Error(EndLoc, "mismatched lane index in register list"); + Error(LaneLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } continue; @@ -3221,7 +3227,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { VectorLaneTy NextLaneKind; unsigned NextLaneIndex; SMLoc EndLoc = Parser.getTok().getLoc(); - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { Error(EndLoc, "mismatched lane index in register list"); @@ -3229,11 +3235,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } } - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RCurly)) { - Error(E, "'}' expected"); + Error(Parser.getTok().getLoc(), "'}' expected"); return MatchOperand_ParseFail; } + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat '}' token. switch (LaneKind) { @@ -3525,7 +3531,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, const MCExpr *ShiftAmount; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { + SMLoc EndLoc; + if (getParser().ParseExpression(ShiftAmount, EndLoc)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3540,7 +3547,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc)); return MatchOperand_Success; } @@ -3550,7 +3557,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { - Error(Tok.getLoc(), "'be' or 'le' operand expected"); + Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } int Val = StringSwitch<int>(Tok.getString()) @@ -3560,12 +3567,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the token. if (Val == -1) { - Error(Tok.getLoc(), "'be' or 'le' operand expected"); + Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), - S, Parser.getTok().getLoc())); + S, Tok.getEndLoc())); return MatchOperand_Success; } @@ -3601,16 +3608,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. + SMLoc ExLoc = Parser.getTok().getLoc(); const MCExpr *ShiftAmount; - SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { - Error(E, "malformed shift expression"); + SMLoc EndLoc; + if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + Error(ExLoc, "malformed shift expression"); return MatchOperand_ParseFail; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); if (!CE) { - Error(E, "shift amount must be an immediate"); + Error(ExLoc, "shift amount must be an immediate"); return MatchOperand_ParseFail; } @@ -3618,25 +3626,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isASR) { // Shift amount must be in [1,32] if (Val < 1 || Val > 32) { - Error(E, "'asr' shift amount must be in range [1,32]"); + Error(ExLoc, "'asr' shift amount must be in range [1,32]"); return MatchOperand_ParseFail; } // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode. if (isThumb() && Val == 32) { - Error(E, "'asr #32' shift amount not allowed in Thumb mode"); + Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode"); return MatchOperand_ParseFail; } if (Val == 32) Val = 0; } else { // Shift amount must be in [1,32] if (Val < 0 || Val > 31) { - Error(E, "'lsr' shift amount must be in range [0,31]"); + Error(ExLoc, "'lsr' shift amount must be in range [0,31]"); return MatchOperand_ParseFail; } } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E)); + Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc)); return MatchOperand_Success; } @@ -3662,16 +3669,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. + SMLoc ExLoc = Parser.getTok().getLoc(); const MCExpr *ShiftAmount; - SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { - Error(E, "malformed rotate expression"); + SMLoc EndLoc; + if (getParser().ParseExpression(ShiftAmount, EndLoc)) { + Error(ExLoc, "malformed rotate expression"); return MatchOperand_ParseFail; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); if (!CE) { - Error(E, "rotate amount must be an immediate"); + Error(ExLoc, "rotate amount must be an immediate"); return MatchOperand_ParseFail; } @@ -3680,12 +3688,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // normally, zero is represented in asm by omitting the rotate operand // entirely. if (Val != 8 && Val != 16 && Val != 24 && Val != 0) { - Error(E, "'ror' rotate amount must be 8, 16, or 24"); + Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24"); return MatchOperand_ParseFail; } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateRotImm(Val, S, E)); + Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc)); return MatchOperand_Success; } @@ -3735,7 +3742,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat hash token. const MCExpr *WidthExpr; - if (getParser().ParseExpression(WidthExpr)) { + SMLoc EndLoc; + if (getParser().ParseExpression(WidthExpr, EndLoc)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3751,9 +3759,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(E, "'width' operand must be in the range [1,32-lsb]"); return MatchOperand_ParseFail; } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E)); + Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc)); return MatchOperand_Success; } @@ -3772,7 +3779,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); bool haveEaten = false; bool isAdd = true; - int Reg = -1; if (Tok.is(AsmToken::Plus)) { Parser.Lex(); // Eat the '+' token. haveEaten = true; @@ -3781,15 +3787,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - if (Parser.getTok().is(AsmToken::Identifier)) - Reg = tryParseRegister(); + + SMLoc E = Parser.getTok().getEndLoc(); + int Reg = tryParseRegister(); if (Reg == -1) { if (!haveEaten) return MatchOperand_NoMatch; Error(Parser.getTok().getLoc(), "register expected"); return MatchOperand_ParseFail; } - SMLoc E = Parser.getTok().getLoc(); ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift; unsigned ShiftImm = 0; @@ -3797,6 +3803,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the ','. if (parseMemRegOffsetShift(ShiftTy, ShiftImm)) return MatchOperand_ParseFail; + + // FIXME: Only approximates end...may include intervening whitespace. + E = Parser.getTok().getLoc(); } Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy, @@ -3829,14 +3838,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // differently. bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *Offset; - if (getParser().ParseExpression(Offset)) + SMLoc E; + if (getParser().ParseExpression(Offset, E)) return MatchOperand_ParseFail; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); if (!CE) { Error(S, "constant expression expected"); return MatchOperand_ParseFail; } - SMLoc E = Tok.getLoc(); // Negative zero is encoded as the flag value INT32_MIN. int32_t Val = CE->getValue(); if (isNegative && Val == 0) @@ -3851,7 +3860,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool haveEaten = false; bool isAdd = true; - int Reg = -1; if (Tok.is(AsmToken::Plus)) { Parser.Lex(); // Eat the '+' token. haveEaten = true; @@ -3860,18 +3868,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - if (Parser.getTok().is(AsmToken::Identifier)) - Reg = tryParseRegister(); + + Tok = Parser.getTok(); + int Reg = tryParseRegister(); if (Reg == -1) { if (!haveEaten) return MatchOperand_NoMatch; - Error(Parser.getTok().getLoc(), "register expected"); + Error(Tok.getLoc(), "register expected"); return MatchOperand_ParseFail; } - SMLoc E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift, - 0, S, E)); + 0, S, Tok.getEndLoc())); return MatchOperand_Success; } @@ -4224,7 +4232,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(Tok.getLoc(), "malformed memory operand"); if (Tok.is(AsmToken::RBrac)) { - E = Tok.getLoc(); + E = Tok.getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, @@ -4272,9 +4280,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. // Don't worry about range checking the value here. That's handled by @@ -4321,9 +4329,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { CE = MCConstantExpr::Create(INT32_MIN, getContext()); // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. // Don't worry about range checking the value here. That's handled by @@ -4367,9 +4375,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, @@ -5723,7 +5731,12 @@ processInstruction(MCInst &Inst, } // Aliases for alternate PC+imm syntax of LDR instructions. case ARM::t2LDRpcrel: - Inst.setOpcode(ARM::t2LDRpci); + // Select the narrow version if the immediate will fit. + if (Inst.getOperand(1).getImm() > 0 && + Inst.getOperand(1).getImm() <= 0xff) + Inst.setOpcode(ARM::tLDRpci); + else + Inst.setOpcode(ARM::t2LDRpci); return true; case ARM::t2LDRBpcrel: Inst.setOpcode(ARM::t2LDRBpci); @@ -7636,7 +7649,7 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().ParseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -7824,13 +7837,10 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { return true; } -extern "C" void LLVMInitializeARMAsmLexer(); - /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget); - LLVMInitializeARMAsmLexer(); } #define GET_REGISTER_MATCHER diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index e24a1b1786..d2012c387c 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmParser - ARMAsmLexer.cpp ARMAsmParser.cpp ) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 1ea4e00867..5f426d270a 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -11,7 +11,6 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel) tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel) tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) @@ -40,6 +39,7 @@ add_llvm_target(ARMCodeGen ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp + ARMTargetTransformInfo.cpp MLxExpansionPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 65c8c1a561..31a3b0b524 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -13,7 +13,6 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" @@ -105,10 +104,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const; - - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; -private: }; /// ThumbDisassembler - Thumb disassembler for all Thumb platforms. @@ -131,8 +126,6 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; private: mutable ITStatus ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; @@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -#include "ARMGenEDInfo.inc" static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { return new ARMDisassembler(STI); @@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge return new ThumbDisassembler(STI); } -const EDInstInfo *ARMDisassembler::getEDInfo() const { - return instInfoARM; -} - -const EDInstInfo *ThumbDisassembler::getEDInfo() const { - return instInfoARM; -} - DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, const MemoryObject &Region, uint64_t Address, diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT new file mode 100755 index 0000000000..68afea12ed --- /dev/null +++ b/lib/Target/ARM/LICENSE.TXT @@ -0,0 +1,47 @@ +ARM Limited + +Software Grant License Agreement ("Agreement") + +Except for the license granted herein to you, ARM Limited ("ARM") reserves all +right, title, and interest in and to the Software (defined below). + +Definition + +"Software" means the code and documentation as well as any original work of +authorship, including any modifications or additions to an existing work, that +is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for +inclusion in, or documentation of, any of the products owned or managed by LLVM +(the "Work"). For the purposes of this definition, "submitted" means any form of +electronic, verbal, or written communication sent to LLVM or its +representatives, including but not limited to communication on electronic +mailing lists, source code control systems, and issue tracking systems that are +managed by, or on behalf of, LLVM for the purpose of discussing and improving +the Work, but excluding communication that is conspicuously marked otherwise. + +1. Grant of Copyright License. Subject to the terms and conditions of this + Agreement, ARM hereby grants to you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable copyright license to reproduce, prepare derivative + works of, publicly display, publicly perform, sublicense, and distribute the + Software and such derivative works. + +2. Grant of Patent License. Subject to the terms and conditions of this + Agreement, ARM hereby grants you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable (except as stated in this section) patent license + to make, have made, use, offer to sell, sell, import, and otherwise transfer + the Work, where such license applies only to those patent claims licensable + by ARM that are necessarily infringed by ARM's Software alone or by + combination of the Software with the Work to which such Software was + submitted. If any entity institutes patent litigation against ARM or any + other entity (including a cross-claim or counterclaim in a lawsuit) alleging + that ARM's Software, or the Work to which ARM has contributed constitutes + direct or contributory patent infringement, then any patent licenses granted + to that entity under this Agreement for the Software or Work shall terminate + as of the date such litigation is filed. + +Unless required by applicable law or agreed to in writing, the software is +provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +either express or implied, including, without limitation, any warranties or +conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 45fef17785..41594dc578 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -123,7 +123,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; void relaxInstruction(const MCInst &Inst, MCInst &Res) const; @@ -166,7 +166,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { case ARM::fixup_arm_thumb_br: { @@ -221,10 +221,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP - // @LOCALMOD-BEGIN - // The upstream operation for this encoding is not what it says it is. const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 - // @LOCALMOD-END const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding @@ -635,30 +632,11 @@ public: uint8_t _OSABI) : ARMAsmBackend(T, TT), OSABI(_OSABI) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMELFObjectWriter(OS, OSABI); } }; -// FIXME: Raise this to share code between Darwin and ELF. -void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value) const { - unsigned NumBytes = 4; // FIXME: 2 for Thumb - Value = adjustFixupValue(Fixup, Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - - // For each byte of the fragment that the fixup touches, mask in the bits from - // the fixup value. The Value has been "split up" into the appropriate - // bitfields above. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); -} - // @LOCALMOD-BEGIN class NaClARMAsmBackend : public ELFARMAsmBackend { public: @@ -726,7 +704,7 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); // @LOCALMOD-END - if (TheTriple.isOSNaCl()) + if (TheTriple.isOSNaCl()) return new NaClARMAsmBackend(T, TT, OSABI); // @LOCALMOD-END return new ELFARMAsmBackend(T, TT, OSABI); diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index d291304e83..9193e40bed 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -71,11 +71,10 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, const MCFixup &Fixup, bool IsPCRel) const { const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); - const MCSymbol &ASymbol = Symbol.AliasedSymbol(); bool EmitThisSym = false; const MCSectionELF &Section = - static_cast<const MCSectionELF&>(ASymbol.getSection()); + static_cast<const MCSectionELF&>(Symbol.getSection()); bool InNormalSection = true; unsigned RelocType = 0; RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel); @@ -134,13 +133,14 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, switch (RelocType) { default: EmitThisSym = true; break; case ELF::R_ARM_ABS32: EmitThisSym = false; break; + case ELF::R_ARM_PREL31: EmitThisSym = false; break; } } if (EmitThisSym) - return &ASymbol; + return &Symbol; if (! Symbol.isTemporary() && InNormalSection) { - return &ASymbol; + return &Symbol; } return NULL; } @@ -226,6 +226,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_NONE: + Type = ELF::R_ARM_NONE; + break; case MCSymbolRefExpr::VK_ARM_GOT: Type = ELF::R_ARM_GOT_BREL; break; @@ -250,7 +253,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TARGET2: Type = ELF::R_ARM_TARGET2; break; - } + case MCSymbolRefExpr::VK_ARM_PREL31: + Type = ELF::R_ARM_PREL31; + break; + } break; case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp new file mode 100644 index 0000000000..39ded8fb57 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -0,0 +1,201 @@ +//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ARM ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to +// delimit regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf. +/// +/// In brief: $a, $t or $d should be emitted at the start of each contiguous +/// region of ARM code, Thumb code or data in a section. In practice, this +/// emission does not rely on explicit assembler directives but on inherent +/// properties of the directives doing the emission (e.g. ".byte" is data, "add +/// r0, r0, r0" an instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class ARMELFStreamer : public MCELFStreamer { +public: + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, bool IsThumb) + : MCELFStreamer(Context, TAB, OS, Emitter), + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) { + } + + ~ARMELFStreamer() {} + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + if (IsThumb) + EmitThumbMappingSymbol(); + else + EmitARMMappingSymbol(); + + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if + /// necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if + /// necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + MCELFStreamer::EmitAssemblerFlag(Flag); + + switch (Flag) { + case MCAF_SyntaxUnified: + return; // no-op here. + case MCAF_Code16: + IsThumb = true; + return; // Change to Thumb mode + case MCAF_Code32: + IsThumb = false; + return; // Change to ARM mode + case MCAF_Code64: + return; + case MCAF_SubsectionsViaSymbols: + return; + } + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_ARM, + EMS_Thumb, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitThumbMappingSymbol() { + if (LastEMS == EMS_Thumb) return; + EmitMappingSymbol("$t"); + LastEMS = EMS_Thumb; + } + + void EmitARMMappingSymbol() { + if (LastEMS == EMS_ARM) return; + EmitMappingSymbol("$a"); + LastEMS = EMS_ARM; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + void EmitThumbFunc(MCSymbol *Func) { + // FIXME: Anything needed here to flag the function as thumb? + + getAssembler().setIsThumbFunc(Func); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func); + SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); + } + + + bool IsThumb; + int64_t MappingSymbolCounter; + + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS; + + /// @} +}; +} + +namespace llvm { + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack, + bool IsThumb) { + ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } + +} + + diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h new file mode 100644 index 0000000000..77ae5d2362 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h @@ -0,0 +1,27 @@ +//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the ARM backend. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_ELF_STREAMER_H +#define ARM_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack, + bool IsThumb); +} + +#endif // ARM_ELF_STREAMER_H diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp index af2b04086a..cd25c865d9 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp @@ -77,7 +77,7 @@ static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) { int64_t Pred = Saved[2].getOperand(2).getImm(); assert((ARM::SP == Addr) && "Unexpected register at stack guard"); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); Out.EmitInstruction(Saved[1]); EmitBICMask(Out, Addr, Pred, 0xC0000000); Out.EmitBundleUnlock(); @@ -89,8 +89,7 @@ static void EmitDirectGuardCall(int I, MCInst Saved[], // sfi_nops_to_force_slot3 assert(I == 2 && (ARM::SFI_GUARD_CALL == Saved[0].getOpcode()) && "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); - Out.EmitBundleAlignEnd(); - Out.EmitBundleLock(); + Out.EmitBundleLock(true); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); } @@ -104,8 +103,7 @@ static void EmitIndirectGuardCall(int I, MCInst Saved[], "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); unsigned Reg = Saved[0].getOperand(0).getReg(); int64_t Pred = Saved[0].getOperand(2).getImm(); - Out.EmitBundleAlignEnd(); - Out.EmitBundleLock(); + Out.EmitBundleLock(true); EmitBICMask(Out, Reg, Pred, 0xC000000F); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -120,7 +118,7 @@ static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) { unsigned Reg = Saved[0].getOperand(0).getReg(); int64_t Pred = Saved[0].getOperand(2).getImm(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitBICMask(Out, Reg, Pred, 0xC000000F); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -134,7 +132,7 @@ static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) { "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN"); int64_t Pred = Saved[0].getOperand(0).getImm(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitBICMask(Out, ARM::LR, Pred, 0xC000000F); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -149,7 +147,7 @@ static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) { unsigned Reg = Saved[0].getOperand(0).getReg(); int64_t Pred = Saved[0].getOperand(2).getImm(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitBICMask(Out, Reg, Pred, 0xC0000000); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -163,7 +161,7 @@ static void EmitGuardLoadOrStoreTst(int I, MCInst Saved[], MCStreamer &Out) { "Unexpected SFI Pseudo while lowering"); unsigned Reg = Saved[0].getOperand(0).getReg(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitTST(Out, Reg); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -182,7 +180,7 @@ static void EmitGuardSpLoad(int I, MCInst Saved[], MCStreamer &Out) { int64_t Pred = Saved[3].getOperand(2).getImm(); assert((ARM::SP == SpReg) && "Unexpected register at stack guard"); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitBICMask(Out, AddrReg, Pred, 0xC0000000); Out.EmitInstruction(Saved[2]); EmitBICMask(Out, SpReg, Pred, 0xC0000000); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e7d3e7cb0b..00b3b223fd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -12,7 +12,10 @@ //===----------------------------------------------------------------------===// #include "ARMMCTargetDesc.h" +#include "ARMELFStreamer.h" +#include "ARMMCAsmInfo.h" #include "ARMBaseInfo.h" +#include "ARMELFStreamer.h" #include "ARMMCAsmInfo.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/MC/MCCodeGenInfo.h" @@ -144,7 +147,7 @@ static MCInstrInfo *createARMMCInstrInfo() { static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { MCRegisterInfo *X = new MCRegisterInfo(); - InitARMMCRegisterInfo(X, ARM::LR); + InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC); return X; } @@ -196,8 +199,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); } - - return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack); + // @LOCALMOD-BEGIN + MCStreamer *Streamer = createARMELFStreamer(Ctx, MAB, OS, Emitter, false, + NoExecStack, TheTriple.getArch() == Triple::thumb); + if (TheTriple.isOSNaCl()) + Streamer->EmitBundleAlignMode(4); + return Streamer; + // @LOCALMOD-END } static MCInstPrinter *createARMMCInstPrinter(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index 3ee853c822..e9f7682d6f 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp ARMELFObjectWriter.cpp + ARMELFStreamer.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 3e48ed1189..f069535ff3 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ + ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index 500e3de82d..fa5681fb12 100644 --- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index edd73c20c0..123ada67e1 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -281,7 +281,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); AddDefaultPred(MIB); - MIB->copyImplicitOps(&*MBBI); + MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } @@ -352,7 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - MIB->copyImplicitOps(&*MI); + MIB.copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 957a34d11d..57cc7d8604 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -24,10 +24,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -390,6 +390,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); + MachineInstrBuilder MIB(*MBB.getParent(), &MI); unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); @@ -417,7 +418,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset MI.RemoveOperand(FrameRegIdx+1); - MachineInstrBuilder MIB(&MI); return true; } @@ -428,7 +428,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (Opcode == ARM::tADDi3) { MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); - MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) .addImm(Offset / Scale)); } else { @@ -457,7 +456,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (Opcode == ARM::tADDi3) { MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); - MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); } else { MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); @@ -603,6 +601,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); + MachineInstrBuilder MIB(*MBB.getParent(), &MI); while (!MI.getOperand(i).isFI()) { ++i; @@ -719,8 +718,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Add predicate back if it's needed. - if (MI.isPredicable()) { - MachineInstrBuilder MIB(&MI); + if (MI.isPredicable()) AddDefaultPred(MIB); - } } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 9fba8227f6..67e8ec7c5f 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Remove offset and remaining explicit predicate operands. do MI.RemoveOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); - MachineInstrBuilder MIB(&MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); AddDefaultPred(MIB); return true; } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 9c018bc37a..1a7a4d450c 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -19,9 +19,9 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index d1cde1bf98..567bc05272 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -52,81 +53,82 @@ namespace { unsigned PredCC2 : 2; unsigned PartFlag : 1; // 16-bit instruction does partial flag update unsigned Special : 1; // Needs to be dealt with specially + unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S - { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 }, - { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, - { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, - { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, - { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, - //FIXME: Disable CMN, as CCodes are backwards from compare expectations - //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, - { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, - // FIXME: adr.n immediate offset must be multiple of 4. - //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, - // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less - // likely to cause issue in the loop. As a size / performance workaround, - // they are not marked as such. - { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, - // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 }, - { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, - { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, - { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, - { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, - - // FIXME: Clean this up after splitting each Thumb load / store opcode - // into multiple ones. - { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, - { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - - { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, - // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, + //FIXME: Disable CMN, as CCodes are backwards from compare expectations + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, + // FIXME: adr.n immediate offset must be multiple of 4. + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less + // likely to cause issue in the loop. As a size / performance workaround, + // they are not marked as such. + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1,0 }, + // FIXME: Do we need the 16-bit 'S' variant? + { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + + // FIXME: Clean this up after splitting each Thumb load / store opcode + // into multiple ones. + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, + // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -175,13 +177,22 @@ namespace { bool LiveCPSR, MachineInstr *CPSRDef, bool IsSelfLoop); + /// ReduceMI - Attempt to reduce MI, return true on success. + bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); + /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); + + bool OptimizeSize; + bool MinimizeSize; }; char Thumb2SizeReduce::ID = 0; } Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { + OptimizeSize = MinimizeSize = false; for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) @@ -216,8 +227,8 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { bool Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, bool FirstInSelfLoop) { - // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). - if (!STI->avoidCPSRPartialUpdate()) + // Disable the check for -Oz (aka OptimizeForSizeHarder). + if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) return false; if (!Def) @@ -578,7 +589,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = - { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); @@ -596,6 +607,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; + if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && + STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing / minimizing for size. + return false; + unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. @@ -708,6 +725,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; + if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && + STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing / minimizing for size. + return false; + unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; @@ -841,6 +864,32 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { return LiveCPSR; } +bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); + if (OPI == ReduceOpcodeMap.end()) + return false; + const ReduceEntry &Entry = ReduceTable[OPI->second]; + + // Don't attempt normal reductions on "special" cases for now. + if (Entry.Special) + return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + + // Try to transform to a 16-bit two-address instruction. + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) + return true; + + // Try to transform to a 16-bit non-two-address instruction. + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) + return true; + + return false; +} + bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -865,40 +914,20 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); - unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); - if (OPI != ReduceOpcodeMap.end()) { - const ReduceEntry &Entry = ReduceTable[OPI->second]; - // Ignore "special" cases for now. - if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - } - goto ProcessNext; - } - - // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && - ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - goto ProcessNext; - } - - // Try to transform to a 16-bit non-two-address instruction. - if (Entry.NarrowOpc1 && - ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - } + // Does NextMII belong to the same bundle as MI? + bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); + + if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) { + Modified = true; + MachineBasicBlock::instr_iterator I = prior(NextMII); + MI = &*I; + // Removing and reinserting the first instruction in a bundle will break + // up the bundle. Fix the bundling if it was broken. + if (NextInSameBundle && !NextMII->isBundledWithPred()) + NextMII->bundleWithPred(); } - ProcessNext: - if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { + if (!NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around @@ -931,6 +960,13 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); STI = &TM.getSubtarget<ARMSubtarget>(); + // Optimizing / minimizing size? + AttributeSet FnAttrs = MF.getFunction()->getAttributes(); + OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); + MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); + bool Modified = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) Modified |= ReduceMBB(*I); diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index c87a33a8b9..02ac493b42 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -8,7 +8,6 @@ add_llvm_library(LLVMTarget TargetMachine.cpp TargetMachineC.cpp TargetSubtargetInfo.cpp - TargetTransformImpl.cpp ) foreach(t ${LLVM_TARGETS_TO_BUILD}) diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 788c6e6866..f468861434 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -15,17 +15,17 @@ #include "CPPTargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/CallingConv.h" #include "llvm/Config/config.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -479,9 +479,9 @@ void CppWriter::printAttributes(const AttributeSet &PAL, Out << " {\n AttrBuilder B;\n"; #define HANDLE_ATTR(X) \ - if (attrs.hasAttribute(Attributes::X)) \ - Out << " B.addAttribute(Attributes::" #X ");\n"; \ - attrs.removeAttribute(Attributes::X); + if (attrs.contains(Attribute::X)) \ + Out << " B.addAttribute(Attribute::" #X ");\n"; \ + attrs.removeAttribute(Attribute::X); HANDLE_ATTR(SExt); HANDLE_ATTR(ZExt); @@ -509,11 +509,11 @@ void CppWriter::printAttributes(const AttributeSet &PAL, HANDLE_ATTR(NonLazyBind); HANDLE_ATTR(MinSize); #undef HANDLE_ATTR - if (attrs.hasAttribute(Attributes::StackAlignment)) + if (attrs.contains(Attribute::StackAlignment)) Out << " B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")\n"; - attrs.removeAttribute(Attributes::StackAlignment); + attrs.removeAttribute(Attribute::StackAlignment); assert(!attrs.hasAttributes() && "Unhandled attribute!"); - Out << " PAWI.Attrs = Attributes::get(mod->getContext(), B);\n }"; + Out << " PAWI.Attrs = Attribute::get(mod->getContext(), B);\n }"; nl(Out); Out << "Attrs.push_back(PAWI);"; nl(Out); diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index cc6f1fb832..477e788ee2 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -14,7 +14,7 @@ #ifndef CPPTARGETMACHINE_H #define CPPTARGETMACHINE_H -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp index a8ac0a282c..1ca74a4895 100644 --- a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp +++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "CPPTargetMachine.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index cedb3a8303..58b89d1283 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -31,9 +31,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -41,7 +42,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index bc2af63612..bc2af63612 100755..100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp index 1b4b0206f7..2c93d04f98 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -15,7 +15,7 @@ #include "HexagonCallingConvLower.h" #include "Hexagon.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 29bfc6b466..9043cf92d2 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -25,14 +25,14 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index d875fea575..2a00a9f7bd 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -39,7 +39,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 275e135cd6..db292f2cd4 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -16,7 +16,7 @@ #include "HexagonISelLowering.h" #include "HexagonTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 10c54cba9f..16cec5cfe5 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -17,7 +17,6 @@ #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,12 +25,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/InlineAsm.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 14254ce742..5a415ebe54 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -16,8 +16,8 @@ #define Hexagon_ISELLOWERING_H #include "Hexagon.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetLowering.h" namespace llvm { diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 49741a3d1b..05f1e23f60 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -59,9 +59,6 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> bits<6> imm6; } -class Immext<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> { - let isCodeGenOnly = 1; - - bits<26> imm26; -} +let isCodeGenOnly = 1 in +class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []> + : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index dc93fb6b1b..8b183b967c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -98,8 +98,8 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in multiclass ALU32_Pbase<string mnemonic, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : ALU32_rr<(outs IntRegs:$dst), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", ") $dst = ")#mnemonic#"($src2, $src3)", @@ -107,10 +107,10 @@ multiclass ALU32_Pbase<string mnemonic, bit isNot, } multiclass ALU32_Pred<string mnemonic, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ALU32_Pbase<mnemonic, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ALU32_Pbase<mnemonic, PredNot, 1>; + defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>; } } @@ -118,7 +118,7 @@ let InputType = "reg" in multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> { let CextOpcode = CextOp, BaseOpcode = CextOp#_rr in { let isPredicable = 1 in - def #NAME# : ALU32_rr<(outs IntRegs:$dst), + def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = "#mnemonic#"($src1, $src2)", [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), @@ -144,8 +144,8 @@ defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; // ALU32/ALU (ADD with register-immediate form) //===----------------------------------------------------------------------===// multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : ALU32_ri<(outs IntRegs:$dst), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", ") $dst = ")#mnemonic#"($src2, #$src3)", @@ -153,10 +153,10 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> { } multiclass ALU32ri_Pred<string mnemonic, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ALU32ri_Pbase<mnemonic, PredNot, 1>; + defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>; } } @@ -165,7 +165,7 @@ multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> { let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in { let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16, isPredicable = 1 in - def #NAME# : ALU32_ri<(outs IntRegs:$dst), + def NAME : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Ext:$src2), "$dst = "#mnemonic#"($src1, #$src2)", [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1), @@ -222,15 +222,15 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst), multiclass TFR_Pred<bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - def _c#NAME# : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), + let PredSense = !if(PredNot, "false", "true") in { + def _c#NAME : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2", []>; // Predicate new let PNewValue = "new" in - def _cdn#NAME# : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), + def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2", []>; } @@ -240,7 +240,7 @@ let InputType = "reg", neverHasSideEffects = 1 in multiclass TFR_base<string CextOp> { let CextOpcode = CextOp, BaseOpcode = CextOp in { let isPredicable = 1 in - def #NAME# : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + def NAME : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = $src1", []>; @@ -252,15 +252,15 @@ multiclass TFR_base<string CextOp> { } multiclass TFR64_Pred<bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - def _c#NAME# : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2), + let PredSense = !if(PredNot, "false", "true") in { + def _c#NAME : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2), !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2", []>; // Predicate new let PNewValue = "new" in - def _cdn#NAME# : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2), + def _cdn#NAME : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2), !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2", []>; } @@ -270,7 +270,7 @@ let InputType = "reg", neverHasSideEffects = 1 in multiclass TFR64_base<string CextOp> { let CextOpcode = CextOp, BaseOpcode = CextOp in { let isPredicable = 1 in - def #NAME# : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + def NAME : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), "$dst = $src1", []>; @@ -283,16 +283,16 @@ multiclass TFR64_base<string CextOp> { multiclass TFRI_Pred<bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - def _c#NAME# : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Ext:$src2), + let PredSense = !if(PredNot, "false", "true") in { + def _c#NAME : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Ext:$src2), !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2", []>; // Predicate new let PNewValue = "new" in - def _cdn#NAME# : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, s12Ext:$src2), + def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Ext:$src2), !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2", []>; } @@ -303,7 +303,7 @@ multiclass TFRI_base<string CextOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#I in { let opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in - def #NAME# : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), + def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), "$dst = #$src1", [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>; @@ -845,19 +845,19 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1, // Load -- MEMri operand multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : LDInst2<(outs RC:$dst), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : LDInst2<(outs RC:$dst), (ins PredRegs:$src1, MEMri:$addr), - #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"($addr)", []>; } multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new - defm _cdn#NAME# : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>; + defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>; } } @@ -868,7 +868,7 @@ multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC, let CextOpcode = CextOp, BaseOpcode = CextOp in { let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits, isPredicable = 1 in - def #NAME# : LDInst2<(outs RC:$dst), (ins MEMri:$addr), + def NAME : LDInst2<(outs RC:$dst), (ins MEMri:$addr), "$dst = "#mnemonic#"($addr)", []>; @@ -911,20 +911,20 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)), // Load - Base with Immediate offset addressing mode multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : LDInst2<(outs RC:$dst), - (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3), - #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + let PNewValue = !if(isPredNew, "new", "") in + def NAME : LDInst2<(outs RC:$dst), + (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"($src2+#$src3)", []>; } multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; // Predicate new - defm _cdn#NAME# : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; + defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>; } } @@ -936,7 +936,7 @@ multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC, let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits, isPredicable = 1, AddedComplexity = 20 in - def #NAME# : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset), + def NAME : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset), "$dst = "#mnemonic#"($src1+#$offset)", []>; @@ -990,7 +990,7 @@ def LDrid_GP : LDInst2<(outs DoubleRegs:$dst), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def LDd_GP : LDInst2<(outs DoubleRegs:$dst), (ins globaladdress:$global), "$dst = memd(#$global)", @@ -1005,10 +1005,10 @@ def LDd_GP : LDInst2<(outs DoubleRegs:$dst), multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset), - #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"($src2++#$offset)", [], "$src2 = $dst2">; @@ -1016,8 +1016,8 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp, multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC, Operand ImmOp, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>; // Predicate new let Predicates = [HasV4T], validSubTargets = HasV4SubT in defm _cdn#NAME#_V4 : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 1>; @@ -1029,8 +1029,8 @@ multiclass LD_PostInc<string mnemonic, string BaseOp, RegisterClass RC, let BaseOpcode = "POST_"#BaseOp in { let isPredicable = 1 in - def #NAME# : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), - (ins IntRegs:$src1, ImmOp:$offset), + def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, ImmOp:$offset), "$dst = "#mnemonic#"($src1++#$offset)", [], "$src1 = $dst2">; @@ -1057,6 +1057,9 @@ let hasCtrlDep = 1, neverHasSideEffects = 1 in { PredNewRel; } +def : Pat< (i32 (extloadi1 ADDRriS11_0:$addr)), + (i32 (LDrib ADDRriS11_0:$addr)) >; + // Load byte any-extend. def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)), (i32 (LDrib ADDRriS11_0:$addr)) >; @@ -1073,14 +1076,14 @@ def LDrib_GP : LDInst2<(outs IntRegs:$dst), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def LDb_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = memb(#$global)", []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def LDub_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = memub(#$global)", @@ -1101,20 +1104,21 @@ def LDrih_GP : LDInst2<(outs IntRegs:$dst), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def LDh_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = memh(#$global)", []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def LDuh_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = memuh(#$global)", []>, Requires<[NoV4T]>; +let AddedComplexity = 10 in def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), (i32 (LDriub ADDRriS11_0:$addr))>; @@ -1138,8 +1142,9 @@ def LDriuh_GP : LDInst2<(outs IntRegs:$dst), Requires<[NoV4T]>; // Load predicate. -let Defs = [R10,R11,D5], neverHasSideEffects = 1 in -def LDriw_pred : LDInst<(outs PredRegs:$dst), +let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, +isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in +def LDriw_pred : LDInst2<(outs PredRegs:$dst), (ins MEMri:$addr), "Error; should not emit", []>; @@ -1152,7 +1157,7 @@ def LDriw_GP : LDInst2<(outs IntRegs:$dst), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def LDw_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = memw(#$global)", @@ -1161,7 +1166,7 @@ def LDw_GP : LDInst2<(outs IntRegs:$dst), // Deallocate stack frame. let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { - def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1), + def DEALLOCFRAME : LDInst2<(outs), (ins), "deallocframe", []>; } @@ -1393,7 +1398,7 @@ def STrid_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def STd_GP : STInst2<(outs), (ins globaladdress:$global, DoubleRegs:$src), "memd(#$global) = $src", @@ -1435,8 +1440,8 @@ def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst), //===----------------------------------------------------------------------===// multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : STInst2<(outs), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, RC: $src2), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($addr) = $src2", @@ -1444,8 +1449,8 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new let validSubTargets = HasV4SubT, Predicates = [HasV4T] in @@ -1460,9 +1465,9 @@ multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC, let CextOpcode = CextOp, BaseOpcode = CextOp in { let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, isPredicable = 1 in - def #NAME# : STInst2<(outs), + def NAME : STInst2<(outs), (ins MEMri:$addr, RC:$src), - #mnemonic#"($addr) = $src", + mnemonic#"($addr) = $src", []>; let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, @@ -1501,8 +1506,8 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr), //===----------------------------------------------------------------------===// multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : STInst2<(outs), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($src2+#$src3) = $src4", @@ -1511,8 +1516,8 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp, multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true"), isPredicated = 1 in { - defm _c#NAME# : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in { + defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>; // Predicate new let validSubTargets = HasV4SubT, Predicates = [HasV4T] in @@ -1528,9 +1533,9 @@ multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC, let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, isPredicable = 1 in - def #NAME# : STInst2<(outs), + def NAME : STInst2<(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), - #mnemonic#"($src1+#$src2) = $src3", + mnemonic#"($src1+#$src2) = $src3", []>; let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in { @@ -1583,7 +1588,7 @@ def STrib_GP : STInst2<(outs), Requires<[NoV4T]>; // memb(#global)=Rt -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def STb_GP : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memb(#$global) = $src", @@ -1623,7 +1628,7 @@ def STrih_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def STh_GP : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memh(#$global) = $src", @@ -1672,7 +1677,7 @@ def STriw_GP : STInst2<(outs), []>, Requires<[NoV4T]>; -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1, validSubTargets = NoV4SubT in def STw_GP : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memw(#$global) = $src", diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index ae407db029..372de9a8b2 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -12,10 +12,14 @@ //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in -def IMMEXT : Immext<(outs), (ins), - "/* immext #... */", - []>, - Requires<[HasV4T]>; +class T_Immext<dag ins> : + EXTENDERInst<(outs), ins, "immext(#$imm)", []>, + Requires<[HasV4T]>; + +def IMMEXT_b : T_Immext<(ins brtarget:$imm)>; +def IMMEXT_c : T_Immext<(ins calltarget:$imm)>; +def IMMEXT_g : T_Immext<(ins globaladdress:$imm)>; +def IMMEXT_i : T_Immext<(ins u26_6Imm:$imm)>; // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the @@ -83,86 +87,77 @@ def IMMEXT : Immext<(outs), (ins), // Shift halfword. -let isPredicated = 1 in +let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in { def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = aslh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = aslh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = aslh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = aslh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = asrh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = asrh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = asrh($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = asrh($src2)", []>, Requires<[HasV4T]>; +} // Sign extend. -let isPredicated = 1 in +let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in { def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = sxtb($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = sxtb($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = sxtb($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = sxtb($src2)", @@ -170,94 +165,86 @@ def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), Requires<[HasV4T]>; -let isPredicated = 1 in def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = sxth($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = sxth($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = sxth($src2)", []>, Requires<[HasV4T]>; -let isPredicated = 1 in def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = sxth($src2)", []>, Requires<[HasV4T]>; +} // Zero exten. -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in { def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = zxtb($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = zxth($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1) $dst = zxth($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) $dst = zxth($src2)", []>, Requires<[HasV4T]>; -let neverHasSideEffects = 1, isPredicated = 1 in def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) $dst = zxth($src2)", []>, Requires<[HasV4T]>; +} // Generate frame index addresses. -let neverHasSideEffects = 1, isReMaterializable = 1 in +let neverHasSideEffects = 1, isReMaterializable = 1, +isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s32Imm:$offset), "$dst = add($src1, ##$offset)", @@ -431,8 +418,8 @@ def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst), // addressing mode multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : LDInst2<(outs RC:$dst), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : LDInst2<(outs RC:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)", @@ -440,10 +427,10 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>; + defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>; } } @@ -451,7 +438,7 @@ let neverHasSideEffects = 1 in multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { let isPredicable = 1 in - def #NAME#_V4 : LDInst2<(outs RC:$dst), + def NAME#_V4 : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst = "#mnemonic#"($src1+$src2<<#$offset)", []>, Requires<[HasV4T]>; @@ -1054,7 +1041,7 @@ def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), (ins globaladdress:$global), "$dst=memd(#$global)", @@ -1062,7 +1049,8 @@ def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rtt=memd(##global) -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, +validSubTargets = HasV4SubT in { def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1) $dst=memd(##$global)", @@ -1071,7 +1059,6 @@ def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), // if (!Pv) Rtt=memd(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1) $dst=memd(##$global)", @@ -1079,7 +1066,6 @@ def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rtt=memd(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1.new) $dst=memd(##$global)", @@ -1088,14 +1074,14 @@ def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), // if (!Pv) Rtt=memd(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1.new) $dst=memd(##$global)", []>, Requires<[HasV4T]>; +} -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst=memb(#$global)", @@ -1103,7 +1089,8 @@ def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memb(##global) -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, +validSubTargets = HasV4SubT in { def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1) $dst=memb(##$global)", @@ -1111,7 +1098,6 @@ def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rt=memb(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1) $dst=memb(##$global)", @@ -1119,7 +1105,6 @@ def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memb(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1.new) $dst=memb(##$global)", @@ -1127,14 +1112,14 @@ def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rt=memb(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1.new) $dst=memb(##$global)", []>, Requires<[HasV4T]>; +} -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst=memub(#$global)", @@ -1142,7 +1127,8 @@ def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memub(##global) -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, +validSubTargets = HasV4SubT in { def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1) $dst=memub(##$global)", @@ -1151,7 +1137,6 @@ def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), // if (!Pv) Rt=memub(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1) $dst=memub(##$global)", @@ -1159,7 +1144,6 @@ def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memub(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1.new) $dst=memub(##$global)", @@ -1168,14 +1152,14 @@ def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), // if (!Pv) Rt=memub(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1.new) $dst=memub(##$global)", []>, Requires<[HasV4T]>; +} -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst=memh(#$global)", @@ -1183,7 +1167,8 @@ def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, +validSubTargets = HasV4SubT in { def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1) $dst=memh(##$global)", @@ -1191,7 +1176,6 @@ def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rt=memh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1) $dst=memh(##$global)", @@ -1199,7 +1183,6 @@ def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1.new) $dst=memh(##$global)", @@ -1207,14 +1190,14 @@ def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rt=memh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1.new) $dst=memh(##$global)", []>, Requires<[HasV4T]>; +} -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst=memuh(#$global)", @@ -1222,7 +1205,8 @@ def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memuh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, +validSubTargets = HasV4SubT in { def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1) $dst=memuh(##$global)", @@ -1230,7 +1214,6 @@ def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rt=memuh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1) $dst=memuh(##$global)", @@ -1238,7 +1221,6 @@ def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memuh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1.new) $dst=memuh(##$global)", @@ -1246,14 +1228,14 @@ def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rt=memuh(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1.new) $dst=memuh(##$global)", []>, Requires<[HasV4T]>; +} -let isPredicable = 1, neverHasSideEffects = 1 in +let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst=memw(#$global)", @@ -1261,7 +1243,8 @@ def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memw(##global) -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, +validSubTargets = HasV4SubT in { def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1) $dst=memw(##$global)", @@ -1270,7 +1253,6 @@ def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), // if (!Pv) Rt=memw(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1) $dst=memw(##$global)", @@ -1278,7 +1260,6 @@ def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv) Rt=memw(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if ($src1.new) $dst=memw(##$global)", @@ -1287,13 +1268,12 @@ def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), // if (!Pv) Rt=memw(##global) -let neverHasSideEffects = 1, isPredicated = 1 in def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$global), "if (!$src1.new) $dst=memw(##$global)", []>, Requires<[HasV4T]>; - +} def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), @@ -1538,8 +1518,8 @@ def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1), // mode multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : STInst2<(outs), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, RC:$src5), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1549,10 +1529,10 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>; + defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>; } } @@ -1560,9 +1540,9 @@ let isNVStorable = 1 in multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { let isPredicable = 1 in - def #NAME#_V4 : STInst2<(outs), + def NAME#_V4 : STInst2<(outs), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4), - #mnemonic#"($src1+$src2<<#$src3) = $src4", + mnemonic#"($src1+$src2<<#$src3) = $src4", []>, Requires<[HasV4T]>; @@ -1577,8 +1557,8 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> { // addressing mode. multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME#_nv_V4 : NVInst_V4<(outs), + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, RC:$src5), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", @@ -1588,10 +1568,10 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, } multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>; + defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>; } } @@ -1599,9 +1579,9 @@ let mayStore = 1, isNVStore = 1 in multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> { let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in { let isPredicable = 1 in - def #NAME#_nv_V4 : NVInst_V4<(outs), + def NAME#_nv_V4 : NVInst_V4<(outs), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4), - #mnemonic#"($src1+$src2<<#$src3) = $src4.new", + mnemonic#"($src1+$src2<<#$src3) = $src4.new", []>, Requires<[HasV4T]>; @@ -1702,20 +1682,20 @@ def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), // addressing mode and immediate stored value. multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME# : STInst2<(outs), + let PNewValue = !if(isPredNew, "new", "") in + def NAME : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4), - #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; } multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>; + defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>; } } @@ -1723,9 +1703,9 @@ let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in { let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in - def #NAME#_V4 : STInst2<(outs), + def NAME#_V4 : STInst2<(outs), (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3), - #mnemonic#"($src1+#$src2) = #$src3", + mnemonic#"($src1+#$src2) = #$src3", []>, Requires<[HasV4T]>; @@ -2376,8 +2356,8 @@ def : Pat<(store (i32 IntRegs:$src1), // multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC, Operand predImmOp, bit isNot, bit isPredNew> { - let PNewValue = #!if(isPredNew, "new", "") in - def #NAME#_nv_V4 : NVInst_V4<(outs), + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", ") ")#mnemonic#"($src2+#$src3) = $src4.new", @@ -2387,10 +2367,10 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC, multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp, bit PredNot> { - let PredSense = #!if(PredNot, "false", "true") in { - defm _c#NAME# : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>; + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>; // Predicate new - defm _cdn#NAME# : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>; + defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>; } } @@ -2402,9 +2382,9 @@ multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC, let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in { let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, isPredicable = 1 in - def #NAME#_nv_V4 : NVInst_V4<(outs), + def NAME#_nv_V4 : NVInst_V4<(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3), - #mnemonic#"($src1+#$src2) = $src3.new", + mnemonic#"($src1+#$src2) = $src3.new", []>, Requires<[HasV4T]>; @@ -2425,16 +2405,56 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in { u6_2Ext, 13, 8>, AddrModeRel; } -// Store new-value byte. +// multiclass for new-value store instructions with base + immediate offset. +// and MEMri operand. +multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot, + bit isPredNew> { + let PNewValue = !if(isPredNew, "new", "") in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, RC: $src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ", + ") ")#mnemonic#"($addr) = $src2.new", + []>, + Requires<[HasV4T]>; +} + +multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> { + let PredSense = !if(PredNot, "false", "true") in { + defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>; + + // Predicate new + defm _cdn#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 1>; + } +} + +let mayStore = 1, isNVStore = 1, isExtendable = 1, neverHasSideEffects = 1 in +multiclass ST_MEMri_nv<string mnemonic, string CextOp, RegisterClass RC, + bits<5> ImmBits, bits<5> PredImmBits> { -// memb(Re=#U6)=Nt.new -// memb(Rs+#s11:0)=Nt.new -let mayStore = 1, isPredicable = 1 in -def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1), - "memb($addr) = $src1.new", + let CextOpcode = CextOp, BaseOpcode = CextOp in { + let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits, + isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins MEMri:$addr, RC:$src), + mnemonic#"($addr) = $src.new", []>, Requires<[HasV4T]>; + let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits, + neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ST_MEMri_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_MEMri_Pred_nv<mnemonic, RC, 1>; + } + } +} + +let addrMode = BaseImmOffset, isMEMri = "true", validSubTargets = HasV4SubT, +mayStore = 1 in { + defm STrib: ST_MEMri_nv<"memb", "STrib", IntRegs, 11, 6>, AddrModeRel; + defm STrih: ST_MEMri_nv<"memh", "STrih", IntRegs, 12, 7>, AddrModeRel; + defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel; +} + // memb(Ru<<#u2+#U6)=Nt.new let mayStore = 1, AddedComplexity = 10 in def STrib_shl_nv_V4 : NVInst_V4<(outs), @@ -2473,44 +2493,6 @@ def STb_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// Store new-value byte conditionally. -// if ([!]Pv[.new]) memb(#u6)=Nt.new -// if (Pv) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrib_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1) memb($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrib_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1.new) memb($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrib_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1) memb($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1.new) memb($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - // if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new // if (Pv) memb(Rx++#s4:0)=Nt.new let mayStore = 1, hasCtrlDep = 1, @@ -2548,16 +2530,6 @@ def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), [],"$src3 = $dst">, Requires<[HasV4T]>; - -// Store new-value halfword. -// memh(Re=#U6)=Nt.new -// memh(Rs+#s11:1)=Nt.new -let mayStore = 1, isPredicable = 1 in -def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1), - "memh($addr) = $src1.new", - []>, - Requires<[HasV4T]>; - // memh(Ru<<#u2+#U6)=Nt.new let mayStore = 1, AddedComplexity = 10 in def STrih_shl_nv_V4 : NVInst_V4<(outs), @@ -2597,47 +2569,6 @@ def STh_GP_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; -// Store new-value halfword conditionally. - -// if ([!]Pv[.new]) memh(#u6)=Nt.new - -// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new -// if (Pv) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrih_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1) memh($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrih_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1.new) memh($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrih_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1) memh($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1.new) memh($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - // if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new // if (Pv) memh(Rx++#s4:1)=Nt.new let mayStore = 1, hasCtrlDep = 1, @@ -2675,18 +2606,6 @@ def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), [],"$src3 = $dst">, Requires<[HasV4T]>; - -// Store new-value word. - -// memw(Re=#U6)=Nt.new -// memw(Rs+#s11:2)=Nt.new -let mayStore = 1, isPredicable = 1 in -def STriw_nv_V4 : NVInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$src1), - "memw($addr) = $src1.new", - []>, - Requires<[HasV4T]>; - // memw(Ru<<#u2+#U6)=Nt.new let mayStore = 1, AddedComplexity = 10 in def STriw_shl_nv_V4 : NVInst_V4<(outs), @@ -2723,47 +2642,6 @@ def STw_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// Store new-value word conditionally. - -// if ([!]Pv[.new]) memw(#u6)=Nt.new - -// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new -// if (Pv) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STriw_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1) memw($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv.new) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STriw_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1.new) memw($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STriw_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1) memw($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv.new) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1, - isPredicated = 1 in -def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1.new) memw($addr) = $src2.new", - []>, - Requires<[HasV4T]>; - // if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new // if (Pv) memw(Rx++#s4:2)=Nt.new let mayStore = 1, hasCtrlDep = 1, diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index dc644ad213..db36ac0110 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -16,7 +16,7 @@ #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 7195c4a8d3..576f1d7d07 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -45,7 +45,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2985a5667b..d1882de432 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -25,14 +25,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MachineLocation.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index a4445cb4b1..34bf4eacfd 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -15,8 +15,8 @@ #include "Hexagon.h" #include "HexagonTargetMachine.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" @@ -51,7 +51,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { unsigned Idx = 1; for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI, ++Idx) { - if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { Argument* Arg = AI; if (!isa<PointerType>(Arg->getType())) { for (Instruction::use_iterator UI = Arg->use_begin(); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 8fc836b458..287b3d615b 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -16,7 +16,7 @@ #include "HexagonISelLowering.h" #include "HexagonMachineScheduler.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" @@ -74,8 +74,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), - InstrItins(&Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(&Subtarget.getInstrItineraryData()) { setMCUseCFI(false); } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index fa669a2719..cf8f9aa361 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -19,9 +19,8 @@ #include "HexagonInstrInfo.h" #include "HexagonSelectionDAGInfo.h" #include "HexagonSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -35,8 +34,6 @@ class HexagonTargetMachine : public LLVMTargetMachine { HexagonSelectionDAGInfo TSInfo; HexagonFrameLowering FrameLowering; const InstrItineraryData* InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, @@ -71,14 +68,6 @@ public: return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - virtual const DataLayout *getDataLayout() const { return &DL; } static unsigned getModuleMatchQuality(const Module &M); diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 2c1145f171..993fcfaed4 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -14,10 +14,10 @@ #include "HexagonTargetObjectFile.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index ab6f6cf899..409a243f1c 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -241,8 +241,9 @@ static bool IsIndirectCall(MachineInstr* MI) { // reservation fail. void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr( - QII->get(Hexagon::IMMEXT), MI->getDebugLoc()); + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MI->getDebugLoc()); if (ResourceTracker->canReserveResources(PseudoMI)) { ResourceTracker->reserveResources(PseudoMI); @@ -259,7 +260,7 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { assert(QII->isExtended(MI) && "Should only be called for constant extended instructions"); MachineFunction *MF = MI->getParent()->getParent(); - MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT), + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), MI->getDebugLoc()); bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); MF->DeleteMachineInstr(PseudoMI); @@ -270,8 +271,9 @@ bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { // true, otherwise, return false. bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr( - QII->get(Hexagon::IMMEXT), MI->getDebugLoc()); + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), + MI->getDebugLoc()); if (ResourceTracker->canReserveResources(PseudoMI)) { ResourceTracker->reserveResources(PseudoMI); diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp index 7aa5dd3b89..40f6c8d23e 100644 --- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp +++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "Hexagon.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index eb6c779f45..f3a9c1c3e7 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore +subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt index 813767ba6d..4a7d8e8d88 100644 --- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt +++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt @@ -2,7 +2,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMMBlazeAsmParser - MBlazeAsmLexer.cpp MBlazeAsmParser.cpp ) diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp deleted file mode 100644 index 480e79f9f5..0000000000 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ /dev/null @@ -1,112 +0,0 @@ -//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/MBlazeBaseInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Support/TargetRegistry.h" -#include <map> -#include <string> - -using namespace llvm; - -namespace { - - class MBlazeBaseAsmLexer : public MCTargetAsmLexer { - const MCAsmInfo &AsmInfo; - - const AsmToken &lexDefinite() { - return getLexer()->Lex(); - } - - AsmToken LexTokenUAL(); - protected: - typedef std::map <std::string, unsigned> rmap_ty; - - rmap_ty RegisterMap; - - void InitRegisterMap(const MCRegisterInfo *info) { - unsigned numRegs = info->getNumRegs(); - - for (unsigned i = 0; i < numRegs; ++i) { - const char *regName = info->getName(i); - if (regName) - RegisterMap[regName] = i; - } - } - - unsigned MatchRegisterName(StringRef Name) { - rmap_ty::iterator iter = RegisterMap.find(Name.str()); - if (iter != RegisterMap.end()) - return iter->second; - else - return 0; - } - - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenUAL(); - } - } - public: - MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : MCTargetAsmLexer(T), AsmInfo(MAI) { - } - }; - - class MBlazeAsmLexer : public MBlazeBaseAsmLexer { - public: - MBlazeAsmLexer(const Target &T, const MCRegisterInfo &MRI, - const MCAsmInfo &MAI) - : MBlazeBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } - }; -} - -AsmToken MBlazeBaseAsmLexer::LexTokenUAL() { - const AsmToken &lexedToken = lexDefinite(); - - switch (lexedToken.getKind()) { - default: - return AsmToken(lexedToken); - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Identifier: - { - unsigned regID = MatchRegisterName(lexedToken.getString().lower()); - - if (regID) { - return AsmToken(AsmToken::Register, - lexedToken.getString(), - static_cast<int64_t>(regID)); - } else { - return AsmToken(lexedToken); - } - } - } -} - -extern "C" void LLVMInitializeMBlazeAsmLexer() { - RegisterMCAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget); -} - diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index 2016600b9d..d73c20f197 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -35,7 +35,8 @@ class MBlazeAsmParser : public MCTargetAsmParser { bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - MBlazeOperand *ParseRegister(unsigned &RegNo); + MBlazeOperand *ParseRegister(); + MBlazeOperand *ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc); MBlazeOperand *ParseImmediate(); MBlazeOperand *ParseFsl(); MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -383,23 +384,31 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool MBlazeAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - return (ParseRegister(RegNo) == 0); + MBlazeOperand *Reg = ParseRegister(StartLoc, EndLoc); + if (!Reg) + return true; + RegNo = Reg->getReg(); + return false; } -MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) { - SMLoc S = Parser.getTok().getLoc(); - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); +MBlazeOperand *MBlazeAsmParser::ParseRegister() { + SMLoc S, E; + return ParseRegister(S, E); +} - switch (getLexer().getKind()) { - default: return 0; - case AsmToken::Identifier: - RegNo = MatchRegisterName(getLexer().getTok().getIdentifier()); - if (RegNo == 0) - return 0; +MBlazeOperand *MBlazeAsmParser::ParseRegister(SMLoc &StartLoc, SMLoc &EndLoc) { + StartLoc = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); - getLexer().Lex(); - return MBlazeOperand::CreateReg(RegNo, S, E); - } + if (getLexer().getKind() != AsmToken::Identifier) + return 0; + + unsigned RegNo = MatchRegisterName(getLexer().getTok().getIdentifier()); + if (RegNo == 0) + return 0; + + getLexer().Lex(); + return MBlazeOperand::CreateReg(RegNo, StartLoc, EndLoc); } static unsigned MatchFslRegister(StringRef String) { @@ -415,7 +424,7 @@ static unsigned MatchFslRegister(StringRef String) { MBlazeOperand *MBlazeAsmParser::ParseFsl() { SMLoc S = Parser.getTok().getLoc(); - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + SMLoc E = Parser.getTok().getEndLoc(); switch (getLexer().getKind()) { default: return 0; @@ -432,7 +441,7 @@ MBlazeOperand *MBlazeAsmParser::ParseFsl() { MBlazeOperand *MBlazeAsmParser::ParseImmediate() { SMLoc S = Parser.getTok().getLoc(); - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + SMLoc E = Parser.getTok().getEndLoc(); const MCExpr *EVal; switch (getLexer().getKind()) { @@ -454,8 +463,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { MBlazeOperand *Op; // Attempt to parse the next token as a register name - unsigned RegNo; - Op = ParseRegister(RegNo); + Op = ParseRegister(); // Attempt to parse the next token as an FSL immediate if (!Op) @@ -532,7 +540,7 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().ParseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -548,12 +556,9 @@ bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } -extern "C" void LLVMInitializeMBlazeAsmLexer(); - /// Force static initialization. extern "C" void LLVMInitializeMBlazeAsmParser() { RegisterMCAsmParser<MBlazeAsmParser> X(TheMBlazeTarget); - LLVMInitializeMBlazeAsmLexer(); } #define GET_REGISTER_MATCHER diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 0bf93d71da..91a41f39b5 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -9,7 +9,6 @@ tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv) tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) -tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(MBlazeCommonTableGen) add_llvm_target(MBlazeCodeGen diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index c9a46a7361..c03ab3803b 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -14,7 +14,6 @@ #include "MBlazeDisassembler.h" #include "MBlaze.h" -#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -25,7 +24,6 @@ // #include "MBlazeGenDecoderTables.inc" // #include "MBlazeGenRegisterNames.inc" -#include "MBlazeGenEDInfo.inc" namespace llvm { extern const MCInstrDesc MBlazeInsts[]; @@ -491,10 +489,6 @@ static unsigned getOPCODE(uint32_t insn) { } } -const EDInstInfo *MBlazeDisassembler::getEDInfo() const { - return instInfoMBlaze; -} - // // Public interface for the disassembler // diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h index 5c4ae3b1ac..b8ff8f6072 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h @@ -23,8 +23,6 @@ class MCInst; class MemoryObject; class raw_ostream; -struct EDInstInfo; - /// MBlazeDisassembler - Disassembler for all MBlaze platforms. class MBlazeDisassembler : public MCDisassembler { public: @@ -44,9 +42,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const; - - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; }; } // namespace llvm diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt index bb2c31a33a..586e2d3eef 100644 --- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt +++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt @@ -5,4 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter MBlazeInstPrinter.cpp ) -add_dependencies(LLVMMBlazeAsmPrinter intrinsics_gen MBlazeCommonTableGen) +add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 0f6f108aea..7dafaef0af 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -26,14 +26,14 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index 99ffa413e6..b6edbba2a7 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -22,8 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index 98257a08d3..78ad24debb 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -23,15 +23,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Type.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 06c150571d..8a9f0922b1 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -18,7 +18,6 @@ #include "MBlazeSubtarget.h" #include "MBlazeTargetMachine.h" #include "MBlazeTargetObjectFile.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,10 +25,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 5238ad0040..8d262a01e7 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "MBlazeIntrinsicInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Type.h" #include <cstring> using namespace llvm; diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp index 457c206fd0..ad414ac40f 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp +++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp @@ -18,7 +18,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 7fa3b2f785..ed06cc4b72 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -24,8 +24,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -34,7 +35,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #define GET_REGINFO_TARGET_DESC #include "MBlazeGenRegisterInfo.inc" diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 70eb9bac6b..bcdd32fed9 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -42,8 +42,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, InstrInfo(*this), FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { } namespace { diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 891a57bef5..956794ddda 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -20,11 +20,10 @@ #include "MBlazeIntrinsicInfo.h" #include "MBlazeSelectionDAGInfo.h" #include "MBlazeSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -38,8 +37,6 @@ namespace llvm { MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MBlazeTargetMachine(const Target &T, StringRef TT, @@ -75,11 +72,6 @@ namespace llvm { const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const - { return &STTI; } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const - { return &VTTI; } - // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index 8a5c13cb1b..a7a0a68b16 100644 --- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -9,9 +9,9 @@ #include "MBlazeTargetObjectFile.h" #include "MBlazeSubtarget.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp index bd2013dee3..6f9752c429 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp @@ -54,7 +54,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; void relaxInstruction(const MCInst &Inst, MCInst &Res) const; @@ -88,7 +88,7 @@ bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // FIXME: Is this right? It's what the "generic" code was doing before, // but is X86 specific. Is it actually true for MBlaze also, or was it diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile index 83c2a7d34d..512ce9a081 100644 --- a/lib/Target/MBlaze/Makefile +++ b/lib/Target/MBlaze/Makefile @@ -15,8 +15,7 @@ BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \ MBlazeGenAsmWriter.inc \ MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \ MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \ - MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \ - MBlazeGenEDInfo.inc + MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp index 71210d8db4..323a7f647d 100644 --- a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp +++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "MBlaze.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt index 99457b924c..64ac994b7f 100644 --- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt +++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt @@ -4,4 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter MSP430InstPrinter.cpp ) -add_dependencies(LLVMMSP430AsmPrinter intrinsics_gen MSP430CommonTableGen) +add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen) diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 4a08a1ff9f..0a04e5ddb7 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -24,13 +24,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index 1b32688489..aef45d8141 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -19,8 +19,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index ff109615b1..1566c09603 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -13,17 +13,17 @@ #include "MSP430.h" #include "MSP430TargetMachine.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 8a1bd0958b..5a156c15c7 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -18,7 +18,6 @@ #include "MSP430MachineFunctionInfo.h" #include "MSP430Subtarget.h" #include "MSP430TargetMachine.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,11 +26,12 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1063,6 +1063,10 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { return 0 && VT1 == MVT::i8 && VT2 == MVT::i16; } +bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + return isZExtFree(Val.getValueType(), VT2); +} + //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index bf021eaac5..ab4e64e921 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -116,6 +116,7 @@ namespace llvm { /// out to 16 bits. virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; + virtual bool isZExtFree(SDValue Val, EVT VT2) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index c99f1f895e..a6b5f2f6d0 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -18,7 +18,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index a2782e39d9..8f7813ad46 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 062c119410..164e351df9 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -36,7 +36,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, // FIXME: Check DataLayout string. DL("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { } + FrameLowering(Subtarget) { } namespace { /// MSP430 Code Generator Pass Configuration Options. diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 8a94d746de..be695a2111 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -21,10 +21,9 @@ #include "MSP430RegisterInfo.h" #include "MSP430SelectionDAGInfo.h" #include "MSP430Subtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -37,8 +36,6 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430TargetLowering TLInfo; MSP430SelectionDAGInfo TSInfo; MSP430FrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MSP430TargetMachine(const Target &T, StringRef TT, @@ -64,12 +61,6 @@ public: virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); }; // MSP430TargetMachine. diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp index 8b3e01ecf5..0d71d04ebe 100644 --- a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp +++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "MSP430.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index c3e83725d6..edfd421d85 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -14,9 +14,9 @@ #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 59fefa1268..57338df53c 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -84,15 +84,30 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseDirective(AsmToken DirectiveID); MipsAsmParser::OperandMatchResultTy - parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + MipsAsmParser::OperandMatchResultTy + parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); - int tryParseRegister(StringRef Mnemonic); + int tryParseRegister(bool is64BitReg); bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic); + bool is64BitReg); bool needsExpansion(MCInst &Inst); @@ -128,9 +143,9 @@ class MipsAsmParser : public MCTargetAsmParser { return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; } - int matchRegisterName(StringRef Symbol); + int matchRegisterName(StringRef Symbol, bool is64BitReg); - int matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic); + int matchRegisterByNumber(unsigned RegNum, unsigned RegClass); void setFpFormat(FpFormatTy Format) { FpFormat = Format; @@ -166,6 +181,20 @@ namespace { /// instruction. class MipsOperand : public MCParsedAsmOperand { +public: + enum RegisterKind { + Kind_None, + Kind_CPURegs, + Kind_CPU64Regs, + Kind_HWRegs, + Kind_HW64Regs, + Kind_FGR32Regs, + Kind_FGR64Regs, + Kind_AFGR32Regs, + Kind_CCRRegs + }; + +private: enum KindTy { k_CondCode, k_CoprocNum, @@ -186,6 +215,7 @@ class MipsOperand : public MCParsedAsmOperand { struct { unsigned RegNum; + RegisterKind Kind; } Reg; struct { @@ -246,6 +276,11 @@ public: return Reg.RegNum; } + void setRegKind(RegisterKind RegKind) { + assert((Kind == k_Register) && "Invalid access!"); + Reg.Kind = RegKind; + } + const MCExpr *getImm() const { assert((Kind == k_Immediate) && "Invalid access!"); return Imm.Val; @@ -296,6 +331,45 @@ public: return Op; } + bool isCPURegsAsm() const { + return Kind == k_Register && Reg.Kind == Kind_CPURegs; + } + void addCPURegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isCPU64RegsAsm() const { + return Kind == k_Register && Reg.Kind == Kind_CPU64Regs; + } + void addCPU64RegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isHWRegsAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_HWRegs; + } + void addHWRegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isHW64RegsAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_HW64Regs; + } + void addHW64RegsAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + void addCCRAsmOperands(MCInst &Inst, unsigned N) const { + Inst.addOperand(MCOperand::CreateReg(Reg.RegNum)); + } + + bool isCCRAsm() const { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.Kind == Kind_CCRRegs; + } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -344,31 +418,31 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, if ( 0 <= ImmValue && ImmValue <= 65535) { // for 0 <= j <= 65535. // li d,j => ori d,$zero,j - tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else if ( ImmValue < 0 && ImmValue >= -32768) { // for -32768 <= j < 0. // li d,j => addiu d,$zero,j - tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64() ? Mips::ZERO_64 : Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { // for any other value of j that is representable as a 32-bit integer. // li d,j => lui d,hi16(j) // ori d,d,lo16(j) - tmpInst.setOpcode(isMips64() ? Mips::LUi64 : Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64() ? Mips::ORi64 : Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -390,7 +464,7 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, if ( -32768 <= ImmValue && ImmValue <= 65535) { //for -32768 <= j <= 65535. //la d,j(s) => addiu d,s,j - tmpInst.setOpcode(Mips::ADDiu); //TODO:no ADDiu64 in td files? + tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); @@ -400,12 +474,12 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, //la d,j(s) => lui d,hi16(j) // ori d,d,lo16(j) // addu d,d,s - tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -433,19 +507,19 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, tmpInst.setOpcode(Mips::ADDiu); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand( - MCOperand::CreateReg(isMips64()?Mips::ZERO_64:Mips::ZERO)); + MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); } else { //for any other value of j that is representable as a 32-bit integer. //la d,j => lui d,hi16(j) // ori d,d,lo16(j) - tmpInst.setOpcode(isMips64()?Mips::LUi64:Mips::LUi); + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); tmpInst.clear(); - tmpInst.setOpcode(isMips64()?Mips::ORi64:Mips::ORi); + tmpInst.setOpcode(Mips::ORi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); @@ -498,10 +572,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } -int MipsAsmParser::matchRegisterName(StringRef Name) { +int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) { int CC; - if (!isMips64()) + if (!is64BitReg) CC = StringSwitch<unsigned>(Name) .Case("zero", Mips::ZERO) .Case("a0", Mips::A0) @@ -643,7 +717,7 @@ unsigned MipsAsmParser::getATReg() { unsigned Reg = Options.getATRegNum(); if (isMips64()) return getReg(Mips::CPU64RegsRegClassID,Reg); - + return getReg(Mips::CPURegsRegClassID,Reg); } @@ -651,58 +725,36 @@ unsigned MipsAsmParser::getReg(int RC,int RegNo) { return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo); } -int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, StringRef Mnemonic) { - - if (Mnemonic.lower() == "rdhwr") { - // at the moment only hwreg29 is supported - if (RegNum != 29) - return -1; - return Mips::HWR29; - } +int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) { if (RegNum > 31) return -1; - // MIPS64 registers are numbered 1 after the 32-bit equivalents - return getReg(Mips::CPURegsRegClassID, RegNum) + isMips64(); + return getReg(RegClass, RegNum); } -int MipsAsmParser::tryParseRegister(StringRef Mnemonic) { +int MipsAsmParser::tryParseRegister(bool is64BitReg) { const AsmToken &Tok = Parser.getTok(); int RegNum = -1; if (Tok.is(AsmToken::Identifier)) { std::string lowerCase = Tok.getString().lower(); - RegNum = matchRegisterName(lowerCase); + RegNum = matchRegisterName(lowerCase, is64BitReg); } else if (Tok.is(AsmToken::Integer)) RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()), - Mnemonic.lower()); - else - return RegNum; //error - // 64 bit div operations require Mips::ZERO instead of MIPS::ZERO_64 - if (isMips64() && RegNum == Mips::ZERO_64) { - if (Mnemonic.find("ddiv") != StringRef::npos) - RegNum = Mips::ZERO; - } + is64BitReg ? Mips::CPU64RegsRegClassID + : Mips::CPURegsRegClassID); return RegNum; } bool MipsAsmParser:: tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic){ + bool is64BitReg){ SMLoc S = Parser.getTok().getLoc(); int RegNo = -1; - // FIXME: we should make a more generic method for CCR - if ((Mnemonic == "cfc1" || Mnemonic == "ctc1") - && Operands.size() == 2 && Parser.getTok().is(AsmToken::Integer)){ - RegNo = Parser.getTok().getIntVal(); // get the int value - // at the moment only fcc0 is supported - if (RegNo == 0) - RegNo = Mips::FCC0; - } else - RegNo = tryParseRegister(Mnemonic); + RegNo = tryParseRegister(is64BitReg); if (RegNo == -1) return true; @@ -734,7 +786,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat dollar token. // parse register operand - if (!tryParseRegisterOperand(Operands, Mnemonic)) { + if (!tryParseRegisterOperand(Operands, isMips64())) { if (getLexer().is(AsmToken::LParen)) { // check if it is indexed addressing operand Operands.push_back(MipsOperand::CreateToken("(", S)); @@ -743,7 +795,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return true; Parser.Lex(); // eat dollar - if (tryParseRegisterOperand(Operands, Mnemonic)) + if (tryParseRegisterOperand(Operands, isMips64())) return true; if (!getLexer().is(AsmToken::RParen)) @@ -868,7 +920,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); - RegNo = tryParseRegister(""); + RegNo = tryParseRegister(isMips64()); EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -907,7 +959,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( if (Tok.isNot(AsmToken::LParen)) { MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]); if (Mnemonic->getToken() == "la") { - SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer()-1); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E)); return MatchOperand_Success; } @@ -920,7 +972,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( const AsmToken &Tok1 = Parser.getTok(); // get next token if (Tok1.is(AsmToken::Dollar)) { Parser.Lex(); // Eat '$' token. - if (tryParseRegisterOperand(Operands,"")) { + if (tryParseRegisterOperand(Operands, isMips64())) { Error(Parser.getTok().getLoc(), "unexpected token in operand"); return MatchOperand_ParseFail; } @@ -954,6 +1006,126 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( return MatchOperand_Success; } +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + if (!isMips64()) + return MatchOperand_NoMatch; + // if the first token is not '$' we have an error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat $ + if(!tryParseRegisterOperand(Operands, true)) { + // set the proper register kind + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + op->setRegKind(MipsOperand::Kind_CPU64Regs); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + // if the first token is not '$' we have an error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat $ + if(!tryParseRegisterOperand(Operands, false)) { + // set the propper register kind + MipsOperand* op = static_cast<MipsOperand*>(Operands.back()); + op->setRegKind(MipsOperand::Kind_CPURegs); + return MatchOperand_Success; + } + return MatchOperand_NoMatch; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + + // if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned RegNum = Tok.getIntVal(); + // at the moment only hwreg29 is supported + if (RegNum != 29) + return MatchOperand_ParseFail; + + MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_HWRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + //if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.isNot(AsmToken::Integer)) + return MatchOperand_NoMatch; + + unsigned RegNum = Tok.getIntVal(); + // at the moment only hwreg29 is supported + if (RegNum != 29) + return MatchOperand_ParseFail; + + MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_HWRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + unsigned RegNum; + //if the first token is not '$' we have error + if (Parser.getTok().isNot(AsmToken::Dollar)) + return MatchOperand_NoMatch; + SMLoc S = Parser.getTok().getLoc(); + Parser.Lex(); // Eat $ + + const AsmToken &Tok = Parser.getTok(); // get next token + if (Tok.is(AsmToken::Integer)) { + RegNum = Tok.getIntVal(); + // at the moment only fcc0 is supported + if (RegNum != 0) + return MatchOperand_ParseFail; + } else if (Tok.is(AsmToken::Identifier)) { + // at the moment only fcc0 is supported + if (Tok.getIdentifier() != "fcc0") + return MatchOperand_ParseFail; + } else + return MatchOperand_NoMatch; + + MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S, + Parser.getTok().getLoc()); + op->setRegKind(MipsOperand::Kind_CCRRegs); + Operands.push_back(op); + + Parser.Lex(); // Eat reg number + return MatchOperand_Success; +} + MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { MCSymbolRefExpr::VariantKind VK diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index ef56e752b2..d6fac0ce59 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -9,7 +9,6 @@ tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM MipsGenMCPseudoLowering.inc -gen-pseudo-lowering) add_public_tablegen_target(MipsCommonTableGen) diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 099a1e79a5..1efeffd328 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -14,7 +14,6 @@ #include "Mips.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" -#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -23,9 +22,6 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" -// Not a normal header, this must come last. -#include "MipsGenEDInfo.inc" - using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; @@ -43,9 +39,6 @@ public: virtual ~MipsDisassemblerBase() {} - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; - const MCRegisterInfo *getRegInfo() const { return RegInfo; } private: @@ -93,10 +86,6 @@ public: } // end anonymous namespace -const EDInstInfo *MipsDisassemblerBase::getEDInfo() const { - return instInfoMips; -} - // Forward declare these because the autogenerated code will reference them. // Definitions are further down. static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, @@ -139,11 +128,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder); - static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -470,17 +454,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { - //Currently only hardware register 29 is supported - if (RegNo != 29) - return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt index c3f4a6e1be..3e9fbf1c55 100644 --- a/lib/Target/Mips/InstPrinter/CMakeLists.txt +++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt @@ -4,4 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter MipsInstPrinter.cpp ) -add_dependencies(LLVMMipsAsmPrinter intrinsics_gen MipsCommonTableGen) +add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen) diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 68d3ac5f3b..97c367fbf1 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -149,6 +149,11 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { OS << ')'; } +void MipsInstPrinter::printCPURegs(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printRegName(O, MI->getOperand(OpNo).getReg()); +} + void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 3d8a6f918f..38cac68801 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -87,6 +87,7 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printCPURegs(const MCInst *MI, unsigned OpNo, raw_ostream &O); private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 9242a0bcc9..8e97d527d7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -38,6 +38,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_4: case FK_Data_8: case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_GPREL16: case Mips::fixup_Mips_GPOFF_HI: case Mips::fixup_Mips_GPOFF_LO: case Mips::fixup_Mips_GOT_PAGE: @@ -214,7 +215,7 @@ public: /// fixup requires the associated instruction to be relaxed. bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // FIXME. assert(0 && "RelaxInstruction() unimplemented"); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index 9d67aa1856..a6797492d8 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -24,6 +24,10 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { (TheTriple.getArch() == Triple::mips64)) IsLittleEndian = false; + if ((TheTriple.getArch() == Triple::mips64el) || + (TheTriple.getArch() == Triple::mips64)) + PointerSize = 8; + AlignmentIsInBytes = false; Data16bitsDirective = "\t.2byte\t"; Data32bitsDirective = "\t.4byte\t"; @@ -34,7 +38,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) { GPRel32Directive = "\t.gpword\t"; GPRel64Directive = "\t.gpdword\t"; WeakRefDirective = "\t.weak\t"; - + DebugLabelSuffix = "=."; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::DwarfCFI; HasLEB128 = true; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp index d39a60d41c..fe0cabc923 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp @@ -48,7 +48,7 @@ static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) { unsigned Mask = Saved[2].getOperand(2).getReg(); assert((Mips::SP == Addr) && "Unexpected register at stack guard"); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); Out.EmitInstruction(Saved[1]); EmitMask(Out, Addr, Mask); Out.EmitBundleUnlock(); @@ -60,8 +60,7 @@ static void EmitDirectGuardCall(int I, MCInst Saved[], // sfi_nops_to_force_slot2 assert(I == 3 && (Mips::SFI_GUARD_CALL == Saved[0].getOpcode()) && "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); - Out.EmitBundleAlignEnd(); - Out.EmitBundleLock(); + Out.EmitBundleLock(true); Out.EmitInstruction(Saved[1]); Out.EmitInstruction(Saved[2]); Out.EmitBundleUnlock(); @@ -78,8 +77,7 @@ static void EmitIndirectGuardCall(int I, MCInst Saved[], unsigned Addr = Saved[0].getOperand(0).getReg(); unsigned Mask = Saved[0].getOperand(2).getReg(); - Out.EmitBundleAlignEnd(); - Out.EmitBundleLock(); + Out.EmitBundleLock(true); EmitMask(Out, Addr, Mask); Out.EmitInstruction(Saved[1]); Out.EmitInstruction(Saved[2]); @@ -95,7 +93,7 @@ static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) { unsigned Addr = Saved[0].getOperand(0).getReg(); unsigned Mask = Saved[0].getOperand(2).getReg(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitMask(Out, Addr, Mask); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -110,7 +108,7 @@ static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) { unsigned Reg = Saved[0].getOperand(0).getReg(); unsigned Mask = Saved[0].getOperand(2).getReg(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitMask(Out, Reg, Mask); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); @@ -125,7 +123,7 @@ static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) { unsigned Reg = Saved[0].getOperand(0).getReg(); unsigned Mask = Saved[0].getOperand(2).getReg(); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); EmitMask(Out, Reg, Mask); Out.EmitInstruction(Saved[1]); Out.EmitBundleUnlock(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 936097137d..7237a1d3fe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -131,7 +131,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, bool NoExecStack) { Triple TheTriple(TT); - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); + // @LOCALMOD-BEGIN + MCStreamer *Streamer = createELFStreamer(Ctx, MAB, _OS, _Emitter, + RelaxAll, NoExecStack); + if (TheTriple.isOSNaCl()) + Streamer->EmitBundleAlignMode(4); + return Streamer; + // @LOCALMOD-END } extern "C" void LLVMInitializeMipsTargetMC() { diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index bd8c517345..bcf951e861 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ - MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \ + MipsGenDisassemblerTables.inc \ MipsGenMCPseudoLowering.inc MipsGenAsmMatcher.inc DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index d17c1259e5..127fcf28a5 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -13,14 +13,15 @@ #include "Mips16FrameLowering.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "Mips16InstrInfo.h" #include "MipsInstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" @@ -29,8 +30,8 @@ using namespace llvm; void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const MipsInstrInfo &TII = - *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + const Mips16InstrInfo &TII = + *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo()); MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); uint64_t StackSize = MFI->getStackSize(); @@ -38,9 +39,35 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + MachineLocation DstML, SrcML; + // Adjust stack. - if (isInt<16>(-StackSize)) - BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize); + TII.makeFrame(Mips::SP, StackSize, MBB, MBBI); + + // emit ".cfi_def_cfa_offset StackSize" + MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); + DstML = MachineLocation(MachineLocation::VirtualFP); + SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); + Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); + + MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); + DstML = MachineLocation(MachineLocation::VirtualFP, -8); + SrcML = MachineLocation(Mips::S1); + Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); + + DstML = MachineLocation(MachineLocation::VirtualFP, -12); + SrcML = MachineLocation(Mips::S0); + Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); + + DstML = MachineLocation(MachineLocation::VirtualFP, -4); + SrcML = MachineLocation(Mips::RA); + Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); if (hasFP(MF)) BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) @@ -52,8 +79,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const MipsInstrInfo &TII = - *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + const Mips16InstrInfo &TII = + *static_cast<const Mips16InstrInfo*>(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); uint64_t StackSize = MFI->getStackSize(); @@ -65,9 +92,8 @@ void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, .addReg(Mips::S0); // Adjust stack. - if (isInt<16>(StackSize)) - // assumes stacksize multiple of 8 - BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize); + // assumes stacksize multiple of 8 + TII.restoreFrame(Mips::SP, StackSize, MBB, MBBI); } bool Mips16FrameLowering:: diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 10b696fe87..91b5ba0800 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -19,11 +19,19 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; +static cl::opt<bool> NeverUseSaveRestore( + "mips16-never-use-save-restore", + cl::init(false), + cl::desc("For testing ability to adjust stack pointer without save/restore instruction"), + cl::Hidden); + + Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, Mips::BimmX16), RI(*tm.getSubtargetImpl(), *this) {} @@ -160,20 +168,135 @@ unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { return 0; } +// Adjust SP by FrameSize bytes. Save RA, S0, S1 +void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + if (!NeverUseSaveRestore) { + if (isUInt<11>(FrameSize)) + BuildMI(MBB, I, DL, get(Mips::SaveRaF16)).addImm(FrameSize); + else { + int Base = 2040; // should create template function like isUInt that returns largest + // possible n bit unsigned integer + int64_t Remainder = FrameSize - Base; + BuildMI(MBB, I, DL, get(Mips::SaveRaF16)). addImm(Base); + if (isInt<16>(-Remainder)) + BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(-Remainder); + else + adjustStackPtrBig(SP, -Remainder, MBB, I, Mips::V0, Mips::V1); + } + + } + else { + // + // sw ra, -4[sp] + // sw s1, -8[sp] + // sw s0, -12[sp] + + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::RA); + MIB1.addReg(Mips::SP); + MIB1.addImm(-4); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S1); + MIB2.addReg(Mips::SP); + MIB2.addImm(-8); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::SwRxSpImmX16), Mips::S0); + MIB3.addReg(Mips::SP); + MIB3.addImm(-12); + adjustStackPtrBig(SP, -FrameSize, MBB, I, Mips::V0, Mips::V1); + } +} + +// Adjust SP by FrameSize bytes. Restore RA, S0, S1 +void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + if (!NeverUseSaveRestore) { + if (isUInt<11>(FrameSize)) + BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)).addImm(FrameSize); + else { + int Base = 2040; // should create template function like isUInt that returns largest + // possible n bit unsigned integer + int64_t Remainder = FrameSize - Base; + if (isInt<16>(Remainder)) + BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Remainder); + else + adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1); + BuildMI(MBB, I, DL, get(Mips::RestoreRaF16)). addImm(Base); + } + } + else { + adjustStackPtrBig(SP, FrameSize, MBB, I, Mips::A0, Mips::A1); + // lw ra, -4[sp] + // lw s1, -8[sp] + // lw s0, -12[sp] + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::A0); + MIB1.addReg(Mips::SP); + MIB1.addImm(-4); + MachineInstrBuilder MIB0 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::RA); + MIB0.addReg(Mips::A0); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S1); + MIB2.addReg(Mips::SP); + MIB2.addImm(-8); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::LwRxSpImmX16), Mips::S0); + MIB3.addReg(Mips::SP); + MIB3.addImm(-12); + } + +} + +// Adjust SP by Amount bytes where bytes can be up to 32bit number. +// This can only be called at times that we know that there is at least one free register. +// This is clearly safe at prologue and epilogue. +// +void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Reg1, unsigned Reg2) const { + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); +// MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo(); +// unsigned Reg1 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass); +// unsigned Reg2 = RegInfo.createVirtualRegister(&Mips::CPU16RegsRegClass); + // + // li reg1, constant + // move reg2, sp + // add reg1, reg1, reg2 + // move sp, reg1 + // + // + MachineInstrBuilder MIB1 = BuildMI(MBB, I, DL, get(Mips::LwConstant32), Reg1); + MIB1.addImm(Amount); + MachineInstrBuilder MIB2 = BuildMI(MBB, I, DL, get(Mips::MoveR3216), Reg2); + MIB2.addReg(Mips::SP, RegState::Kill); + MachineInstrBuilder MIB3 = BuildMI(MBB, I, DL, get(Mips::AdduRxRyRz16), Reg1); + MIB3.addReg(Reg1); + MIB3.addReg(Reg2, RegState::Kill); + MachineInstrBuilder MIB4 = BuildMI(MBB, I, DL, get(Mips::Move32R16), Mips::SP); + MIB4.addReg(Reg1, RegState::Kill); +} + +void Mips16InstrInfo::adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + assert(false && "adjust stack pointer amount exceeded"); +} + /// Adjust SP by Amount bytes. void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - if (isInt<16>(Amount)) { - if (Amount < 0) - BuildMI(MBB, I, DL, get(Mips::SaveDecSpF16)). addImm(-Amount); - else if (Amount > 0) - BuildMI(MBB, I, DL, get(Mips::RestoreIncSpF16)).addImm(Amount); - } + if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16> + BuildMI(MBB, I, DL, get(Mips::AddiuSpImmX16)). addImm(Amount); else - // not implemented for large values yet - assert(false && "adjust stack pointer amount exceeded"); + adjustStackPtrBigUnrestricted(SP, Amount, MBB, I); +} + +/// This function generates the sequence of instructions needed to get the +/// result of adding register REG and immediate IMM. +unsigned +Mips16InstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const { + + return 0; } unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index d6ef8d2bf1..3704e2573f 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -64,15 +64,43 @@ public: virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + // Adjust SP by FrameSize bytes. Save RA, S0, S1 + void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + // Adjust SP by FrameSize bytes. Restore RA, S0, S1 + void restoreFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + /// Emit a series of instructions to load an immediate. If NewImm is a + /// non-NULL parameter, the last instruction is not emitted, but instead + /// its immediate operand is returned in NewImm. + unsigned loadImmediate(int64_t Imm, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + unsigned *NewImm) const; + private: virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; + + // Adjust SP by Amount bytes where bytes can be up to 32bit number. + void adjustStackPtrBig(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Reg1, unsigned Reg2) const; + + // Adjust SP by Amount bytes where bytes can be up to 32bit number. + void adjustStackPtrBigUnrestricted(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + }; } diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 1e560c6049..e8e2f3ce41 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -57,13 +57,17 @@ class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> : class FEXT_I816_ins_base<bits<3> _func, string asmstr, string asmstr2, InstrItinClass itin>: - FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2), + FEXT_I816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2), [], itin>; class FEXT_I816_ins<bits<3> _func, string asmstr, InstrItinClass itin>: FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>; +class FEXT_I816_SP_ins<bits<3> _func, string asmstr, + InstrItinClass itin>: + FEXT_I816_ins_base<_func, asmstr, "\t$$sp, $imm", itin>; + // // Assembler formats in alphabetical order. // Natural and pseudos are mixed together. @@ -352,6 +356,18 @@ class SelT<bits<5> f1, string op1, bits<5> f2, string op2, let Constraints = "$rd = $rd_"; } +// +// 32 bit constant +// +def imm32: Operand<i32>; + +def Constant32: + MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>; + +def LwConstant32: + MipsPseudo16<(outs), (ins CPU16Regs:$rx, imm32:$imm), + "lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>; + // // Some general instruction class info @@ -404,6 +420,18 @@ def AddiuRxRyOffMemX16: // To add a constant to the program counter. // def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; + +// +// Format: ADDIU sp, immediate MIPS16e +// Purpose: Add Immediate Unsigned Word (2-Operand, SP-Relative, Extended) +// To add a constant to the stack pointer. +// +def AddiuSpImmX16 + : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> { + let Defs = [SP]; + let Uses = [SP]; +} + // // Format: ADDU rz, rx, ry MIPS16e // Purpose: Add Unsigned Word (3-Operand) @@ -576,7 +604,9 @@ def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad; // Purpose: Load Word (SP-Relative, Extended) // To load an SP-relative word from memory as a signed value. // -def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad; +def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{ + let Uses = [SP]; +} // // Format: MOVE r32, rz MIPS16e @@ -688,6 +718,8 @@ def RestoreRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), "restore\t$$ra, $$s0, $$s1, $frame_size", [], IILoad >, MayLoad { let isCodeGenOnly = 1; + let Defs = [S0, S1, RA, SP]; + let Uses = [SP]; } // Use Restore to increment SP since SP is not a Mip 16 register, this @@ -698,6 +730,8 @@ def RestoreIncSpF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), "restore\t$frame_size", [], IILoad >, MayLoad { let isCodeGenOnly = 1; + let Defs = [SP]; + let Uses = [SP]; } // @@ -712,6 +746,8 @@ def SaveRaF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), "save\t$$ra, $$s0, $$s1, $frame_size", [], IIStore >, MayStore { let isCodeGenOnly = 1; + let Uses = [RA, SP, S0, S1]; + let Defs = [SP]; } // @@ -723,6 +759,8 @@ def SaveDecSpF16: FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), "save\t$frame_size", [], IIStore >, MayStore { let isCodeGenOnly = 1; + let Uses = [SP]; + let Defs = [SP]; } // // Format: SB ry, offset(rx) MIPS16e diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 6b87ecb08c..c2e09a7206 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -24,9 +24,10 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -35,7 +36,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; @@ -43,6 +43,32 @@ Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, const Mips16InstrInfo &I) : MipsRegisterInfo(ST), TII(I) {} +bool Mips16RegisterInfo::requiresRegisterScavenging + (const MachineFunction &MF) const { + return true; +} +bool Mips16RegisterInfo::requiresFrameIndexScavenging + (const MachineFunction &MF) const { + return true; +} + +bool Mips16RegisterInfo::useFPForScavengingIndex + (const MachineFunction &MF) const { + return false; +} + +bool Mips16RegisterInfo::saveScavengerRegister + (MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + DebugLoc DL; + TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true); + TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true); + return true; +} + // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void Mips16RegisterInfo:: @@ -120,6 +146,10 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + if (!MI.isDebugValue() && ( ((FrameReg != Mips::SP) && !isInt<16>(Offset)) || + ((FrameReg == Mips::SP) && !isInt<15>(Offset)) )) { + llvm_unreachable("frame offset does not fit in instruction"); + } MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 153def20d0..6101739637 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -22,11 +22,25 @@ class Mips16InstrInfo; class Mips16RegisterInfo : public MipsRegisterInfo { const Mips16InstrInfo &TII; public: - Mips16RegisterInfo(const MipsSubtarget &Subtarget, const Mips16InstrInfo &TII); + Mips16RegisterInfo(const MipsSubtarget &Subtarget, + const Mips16InstrInfo &TII); void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + + bool useFPForScavengingIndex(const MachineFunction &MF) const; + + bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 1c852e2fc4..bbeb6498bc 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -34,48 +34,36 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>; //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// -// Shifts -// 64-bit shift instructions. let DecoderNamespace = "Mips64" in { -class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm, - SDNode OpNode>: - shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt, - CPU64Regs>; - -// Mul, Div -class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>: - Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>; -class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: - Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>; - -multiclass Atomic2Ops64<PatFrag Op, string Opstr> { - def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, - Requires<[IsN64, HasStdEnc]> { + +multiclass Atomic2Ops64<PatFrag Op> { + def NAME : Atomic2Ops<Op, CPU64Regs, CPURegs>, + Requires<[NotN64, HasStdEnc]>; + def _P8 : Atomic2Ops<Op, CPU64Regs, CPU64Regs>, + Requires<[IsN64, HasStdEnc]> { let isCodeGenOnly = 1; } } -multiclass AtomicCmpSwap64<PatFrag Op, string Width> { - def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>, - Requires<[IsN64, HasStdEnc]> { +multiclass AtomicCmpSwap64<PatFrag Op> { + def NAME : AtomicCmpSwap<Op, CPU64Regs, CPURegs>, + Requires<[NotN64, HasStdEnc]>; + def _P8 : AtomicCmpSwap<Op, CPU64Regs, CPU64Regs>, + Requires<[IsN64, HasStdEnc]> { let isCodeGenOnly = 1; } } } let usesCustomInserter = 1, Predicates = [HasStdEnc], DecoderNamespace = "Mips64" in { - defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">; - defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">; - defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">; - defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64, "load_or_64">; - defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">; - defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">; - defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64, "swap_64">; - defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64, "64">; + defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64>; + defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64>; + defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64>; + defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64>; + defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64>; + defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>; + defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64>; + defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>; } //===----------------------------------------------------------------------===// @@ -83,140 +71,147 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], //===----------------------------------------------------------------------===// let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) -def DADDi : ArithOverflowI<0x18, "daddi", add, simm16_64, immSExt16, - CPU64Regs>; -def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, - CPU64Regs>, IsAsCheapAsAMove; -def DANDi : ArithLogicI<0x0c, "andi", and, uimm16_64, immZExt16, CPU64Regs>; -def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>; -def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>; -def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>; -def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>; -def LUi64 : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>; +def DADDi : ArithLogicI<"daddi", simm16_64, CPU64RegsOpnd>, ADDI_FM<0x18>; +def DADDiu : ArithLogicI<"daddiu", simm16_64, CPU64RegsOpnd, immSExt16, add>, + ADDI_FM<0x19>, IsAsCheapAsAMove; +def DANDi : ArithLogicI<"andi", uimm16_64, CPU64RegsOpnd, immZExt16, and>, + ADDI_FM<0xc>; +def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, CPU64Regs>, + SLTI_FM<0xa>; +def SLTiu64 : SetCC_I<"sltiu", setult, simm16_64, immSExt16, CPU64Regs>, + SLTI_FM<0xb>; +def ORi64 : ArithLogicI<"ori", uimm16_64, CPU64RegsOpnd, immZExt16, or>, + ADDI_FM<0xd>; +def XORi64 : ArithLogicI<"xori", uimm16_64, CPU64RegsOpnd, immZExt16, xor>, + ADDI_FM<0xe>; +def LUi64 : LoadUpper<"lui", CPU64Regs, uimm16_64>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def DADD : ArithOverflowR<0x00, 0x2C, "dadd", IIAlu, CPU64Regs, 1>; -def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>; -def DSUBu : ArithLogicR<0x00, 0x2f, "dsubu", sub, IIAlu, CPU64Regs>; -def SLT64 : SetCC_R<0x00, 0x2a, "slt", setlt, CPU64Regs>; -def SLTu64 : SetCC_R<0x00, 0x2b, "sltu", setult, CPU64Regs>; -def AND64 : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPU64Regs, 1>; -def OR64 : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPU64Regs, 1>; -def XOR64 : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>; -def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>; +def DADD : ArithLogicR<"dadd", CPU64RegsOpnd>, ADD_FM<0, 0x2c>; +def DADDu : ArithLogicR<"daddu", CPU64RegsOpnd, 1, IIAlu, add>, + ADD_FM<0, 0x2d>; +def DSUBu : ArithLogicR<"dsubu", CPU64RegsOpnd, 0, IIAlu, sub>, + ADD_FM<0, 0x2f>; +def SLT64 : SetCC_R<"slt", setlt, CPU64Regs>, ADD_FM<0, 0x2a>; +def SLTu64 : SetCC_R<"sltu", setult, CPU64Regs>, ADD_FM<0, 0x2b>; +def AND64 : ArithLogicR<"and", CPU64RegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; +def OR64 : ArithLogicR<"or", CPU64RegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; +def XOR64 : ArithLogicR<"xor", CPU64RegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; +def NOR64 : LogicNOR<"nor", CPU64RegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>; -def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>; -def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>; -def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; -def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; -def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; -let Pattern = []<dag> in { - def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; - def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; - def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; -} +def DSLL : shift_rotate_imm<"dsll", shamt, CPU64RegsOpnd, shl, immZExt6>, + SRA_FM<0x38, 0>; +def DSRL : shift_rotate_imm<"dsrl", shamt, CPU64RegsOpnd, srl, immZExt6>, + SRA_FM<0x3a, 0>; +def DSRA : shift_rotate_imm<"dsra", shamt, CPU64RegsOpnd, sra, immZExt6>, + SRA_FM<0x3b, 0>; +def DSLLV : shift_rotate_reg<"dsllv", CPU64RegsOpnd, shl>, SRLV_FM<0x14, 0>; +def DSRLV : shift_rotate_reg<"dsrlv", CPU64RegsOpnd, srl>, SRLV_FM<0x16, 0>; +def DSRAV : shift_rotate_reg<"dsrav", CPU64RegsOpnd, sra>, SRLV_FM<0x17, 0>; +def DSLL32 : shift_rotate_imm<"dsll32", shamt, CPU64RegsOpnd>, SRA_FM<0x3c, 0>; +def DSRL32 : shift_rotate_imm<"dsrl32", shamt, CPU64RegsOpnd>, SRA_FM<0x3e, 0>; +def DSRA32 : shift_rotate_imm<"dsra32", shamt, CPU64RegsOpnd>, SRA_FM<0x3f, 0>; } // Rotate Instructions let Predicates = [HasMips64r2, HasStdEnc], DecoderNamespace = "Mips64" in { - def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>; - def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>; + def DROTR : shift_rotate_imm<"drotr", shamt, CPU64RegsOpnd, rotr, immZExt6>, + SRA_FM<0x3a, 1>; + def DROTRV : shift_rotate_reg<"drotrv", CPU64RegsOpnd, rotr>, SRLV_FM<0x16, 1>; } let DecoderNamespace = "Mips64" in { /// Load and Store Instructions /// aligned -defm LB64 : LoadM64<0x20, "lb", sextloadi8>; -defm LBu64 : LoadM64<0x24, "lbu", zextloadi8>; -defm LH64 : LoadM64<0x21, "lh", sextloadi16>; -defm LHu64 : LoadM64<0x25, "lhu", zextloadi16>; -defm LW64 : LoadM64<0x23, "lw", sextloadi32>; -defm LWu64 : LoadM64<0x27, "lwu", zextloadi32>; -defm SB64 : StoreM64<0x28, "sb", truncstorei8>; -defm SH64 : StoreM64<0x29, "sh", truncstorei16>; -defm SW64 : StoreM64<0x2b, "sw", truncstorei32>; -defm LD : LoadM64<0x37, "ld", load>; -defm SD : StoreM64<0x3f, "sd", store>; +defm LB64 : LoadM<"lb", CPU64Regs, sextloadi8>, LW_FM<0x20>; +defm LBu64 : LoadM<"lbu", CPU64Regs, zextloadi8>, LW_FM<0x24>; +defm LH64 : LoadM<"lh", CPU64Regs, sextloadi16>, LW_FM<0x21>; +defm LHu64 : LoadM<"lhu", CPU64Regs, zextloadi16>, LW_FM<0x25>; +defm LW64 : LoadM<"lw", CPU64Regs, sextloadi32>, LW_FM<0x23>; +defm LWu64 : LoadM<"lwu", CPU64Regs, zextloadi32>, LW_FM<0x27>; +defm SB64 : StoreM<"sb", CPU64Regs, truncstorei8>, LW_FM<0x28>; +defm SH64 : StoreM<"sh", CPU64Regs, truncstorei16>, LW_FM<0x29>; +defm SW64 : StoreM<"sw", CPU64Regs, truncstorei32>, LW_FM<0x2b>; +defm LD : LoadM<"ld", CPU64Regs, load>, LW_FM<0x37>; +defm SD : StoreM<"sd", CPU64Regs, store>, LW_FM<0x3f>; /// load/store left/right -let isCodeGenOnly = 1 in { - defm LWL64 : LoadLeftRightM64<0x22, "lwl", MipsLWL>; - defm LWR64 : LoadLeftRightM64<0x26, "lwr", MipsLWR>; - defm SWL64 : StoreLeftRightM64<0x2a, "swl", MipsSWL>; - defm SWR64 : StoreLeftRightM64<0x2e, "swr", MipsSWR>; -} -defm LDL : LoadLeftRightM64<0x1a, "ldl", MipsLDL>; -defm LDR : LoadLeftRightM64<0x1b, "ldr", MipsLDR>; -defm SDL : StoreLeftRightM64<0x2c, "sdl", MipsSDL>; -defm SDR : StoreLeftRightM64<0x2d, "sdr", MipsSDR>; +defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, CPU64Regs>, LW_FM<0x22>; +defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, CPU64Regs>, LW_FM<0x26>; +defm SWL64 : StoreLeftRightM<"swl", MipsSWL, CPU64Regs>, LW_FM<0x2a>; +defm SWR64 : StoreLeftRightM<"swr", MipsSWR, CPU64Regs>, LW_FM<0x2e>; + +defm LDL : LoadLeftRightM<"ldl", MipsLDL, CPU64Regs>, LW_FM<0x1a>; +defm LDR : LoadLeftRightM<"ldr", MipsLDR, CPU64Regs>, LW_FM<0x1b>; +defm SDL : StoreLeftRightM<"sdl", MipsSDL, CPU64Regs>, LW_FM<0x2c>; +defm SDR : StoreLeftRightM<"sdr", MipsSDR, CPU64Regs>, LW_FM<0x2d>; /// Load-linked, Store-conditional -def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, - Requires<[NotN64, HasStdEnc]>; -def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, - Requires<[IsN64, HasStdEnc]> { - let isCodeGenOnly = 1; +let Predicates = [NotN64, HasStdEnc] in { + def LLD : LLBase<"lld", CPU64RegsOpnd, mem>, LW_FM<0x34>; + def SCD : SCBase<"scd", CPU64RegsOpnd, mem>, LW_FM<0x3c>; } -def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, - Requires<[NotN64, HasStdEnc]>; -def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, - Requires<[IsN64, HasStdEnc]> { - let isCodeGenOnly = 1; + +let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in { + def LLD_P8 : LLBase<"lld", CPU64RegsOpnd, mem64>, LW_FM<0x34>; + def SCD_P8 : SCBase<"scd", CPU64RegsOpnd, mem64>, LW_FM<0x3c>; } /// Jump and Branch Instructions -def JR64 : IndirectBranch<CPU64Regs>; -def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; -def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; -def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; -def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>; -def BLEZ64 : CBranchZero<0x06, 0, "blez", setle, CPU64Regs>; -def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; +def JR64 : IndirectBranch<CPU64Regs>, MTLO_FM<8>; +def BEQ64 : CBranch<"beq", seteq, CPU64Regs>, BEQ_FM<4>; +def BNE64 : CBranch<"bne", setne, CPU64Regs>, BEQ_FM<5>; +def BGEZ64 : CBranchZero<"bgez", setge, CPU64Regs>, BGEZ_FM<1, 1>; +def BGTZ64 : CBranchZero<"bgtz", setgt, CPU64Regs>, BGEZ_FM<7, 0>; +def BLEZ64 : CBranchZero<"blez", setle, CPU64Regs>, BGEZ_FM<6, 0>; +def BLTZ64 : CBranchZero<"bltz", setlt, CPU64Regs>, BGEZ_FM<1, 0>; } let DecoderNamespace = "Mips64" in -def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>; -def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, IsTailCall; +def JALR64 : JumpLinkReg<"jalr", CPU64Regs>, JALR_FM; +def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, MTLO_FM<8>, IsTailCall; let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. -def DMULT : Mult64<0x1c, "dmult", IIImul>; -def DMULTu : Mult64<0x1d, "dmultu", IIImul>; -def DSDIV : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>; -def DUDIV : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>; - -def MTHI64 : MoveToLOHI<0x11, "mthi", CPU64Regs, [HI64]>; -def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>; -def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>; -def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>; +def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; +def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; +def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, + MULT_FM<0, 0x1e>; +def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, + MULT_FM<0, 0x1f>; + +def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>; +def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>; +def MFHI64 : MoveFromLOHI<"mfhi", CPU64Regs, [HI64]>, MFLO_FM<0x10>; +def MFLO64 : MoveFromLOHI<"mflo", CPU64Regs, [LO64]>, MFLO_FM<0x12>; /// Sign Ext In Register Instructions. -def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>; -def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>; +def SEB64 : SignExtInReg<"seb", i8, CPU64Regs>, SEB_FM<0x10, 0x20>; +def SEH64 : SignExtInReg<"seh", i16, CPU64Regs>, SEB_FM<0x18, 0x20>; /// Count Leading -def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>; -def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; +def DCLZ : CountLeading0<"dclz", CPU64RegsOpnd>, CLO_FM<0x24>; +def DCLO : CountLeading1<"dclo", CPU64RegsOpnd>, CLO_FM<0x25>; /// Double Word Swap Bytes/HalfWords -def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; -def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; +def DSBH : SubwordSwap<"dsbh", CPU64RegsOpnd>, SEB_FM<2, 0x24>; +def DSHD : SubwordSwap<"dshd", CPU64RegsOpnd>, SEB_FM<5, 0x24>; + +def LEA_ADDiu64 : EffectiveAddress<"daddiu", CPU64Regs, mem_ea_64>, LW_FM<0x19>; -def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; } let DecoderNamespace = "Mips64" in { -def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; +def RDHWR64 : ReadHardware<CPU64Regs, HW64RegsOpnd>, RDHWR_FM; -def DEXT : ExtBase<3, "dext", CPU64Regs>; +def DEXT : ExtBase<"dext", CPU64RegsOpnd>, EXT_FM<3>; let Pattern = []<dag> in { - def DEXTU : ExtBase<2, "dextu", CPU64Regs>; - def DEXTM : ExtBase<1, "dextm", CPU64Regs>; + def DEXTU : ExtBase<"dextu", CPU64RegsOpnd>, EXT_FM<2>; + def DEXTM : ExtBase<"dextm", CPU64RegsOpnd>, EXT_FM<1>; } -def DINS : InsBase<7, "dins", CPU64Regs>; +def DINS : InsBase<"dins", CPU64RegsOpnd>, EXT_FM<7>; let Pattern = []<dag> in { - def DINSU : InsBase<6, "dinsu", CPU64Regs>; - def DINSM : InsBase<5, "dinsm", CPU64Regs>; + def DINSU : InsBase<"dinsu", CPU64RegsOpnd>, EXT_FM<6>; + def DINSM : InsBase<"dinsm", CPU64RegsOpnd>, EXT_FM<5>; } let isCodeGenOnly = 1, rs = 0, shamt = 0 in { @@ -310,34 +305,56 @@ def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (DADD CPU64Regs:$dst,CPU64Regs:$src,ZERO_64)>; +def : InstAlias<"move $dst,$src", (DADDu CPU64RegsOpnd:$dst, + CPU64RegsOpnd:$src,ZERO_64)>, + Requires<[HasMips64]>; +def : InstAlias<"and $rs, $rt, $imm", + (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, + Requires<[HasMips64]>; +def : InstAlias<"slt $rs, $rt, $imm", + (SLTi64 CPURegsOpnd:$rs, CPU64Regs:$rt, simm16_64:$imm)>, + Requires<[HasMips64]>; +def : InstAlias<"xor $rs, $rt, $imm", + (XORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm)>, + Requires<[HasMips64]>; +def : InstAlias<"not $rt, $rs", (NOR64 CPU64RegsOpnd:$rt, CPU64RegsOpnd:$rs, ZERO_64)>, + Requires<[HasMips64]>; +def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs)>, Requires<[HasMips64]>; +def : InstAlias<"daddu $rs, $rt, $imm", + (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; +def : InstAlias<"dadd $rs, $rt, $imm", + (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm)>; /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64" in { -def MFC0_3OP64 : MFC3OP<0x10, 0, (outs CPU64Regs:$rt), - (ins CPU64Regs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">; -def MTC0_3OP64 : MFC3OP<0x10, 4, (outs CPU64Regs:$rd, uimm16:$sel), - (ins CPU64Regs:$rt),"mtc0\t$rt, $rd, $sel">; -def MFC2_3OP64 : MFC3OP<0x12, 0, (outs CPU64Regs:$rt), - (ins CPU64Regs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">; -def MTC2_3OP64 : MFC3OP<0x12, 4, (outs CPU64Regs:$rd, uimm16:$sel), - (ins CPU64Regs:$rt),"mtc2\t$rt, $rd, $sel">; -def DMFC0_3OP64 : MFC3OP<0x10, 1, (outs CPU64Regs:$rt), - (ins CPU64Regs:$rd, uimm16:$sel),"dmfc0\t$rt, $rd, $sel">; -def DMTC0_3OP64 : MFC3OP<0x10, 5, (outs CPU64Regs:$rd, uimm16:$sel), - (ins CPU64Regs:$rt),"dmtc0\t$rt, $rd, $sel">; -def DMFC2_3OP64 : MFC3OP<0x12, 1, (outs CPU64Regs:$rt), - (ins CPU64Regs:$rd, uimm16:$sel),"dmfc2\t$rt, $rd, $sel">; -def DMTC2_3OP64 : MFC3OP<0x12, 5, (outs CPU64Regs:$rd, uimm16:$sel), - (ins CPU64Regs:$rt),"dmtc2\t$rt, $rd, $sel">; +def MFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), + "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; +def MTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), + "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; +def MFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), + "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; +def MTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), + "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; +def DMFC0_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), + "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>; +def DMTC0_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), + "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>; +def DMFC2_3OP64 : MFC3OP<(outs CPU64Regs:$rt), (ins CPU64Regs:$rd, uimm16:$sel), + "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>; +def DMTC2_3OP64 : MFC3OP<(outs CPU64Regs:$rd, uimm16:$sel), (ins CPU64Regs:$rt), + "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>; } // Two operand (implicit 0 selector) versions: def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; -def : InstAlias<"dmfc0 $rt, $rd", (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; -def : InstAlias<"dmtc0 $rt, $rd", (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; -def : InstAlias<"dmfc2 $rt, $rd", (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; -def : InstAlias<"dmtc2 $rt, $rd", (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"dmfc0 $rt, $rd", + (DMFC0_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"dmtc0 $rt, $rd", + (DMTC0_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; +def : InstAlias<"dmfc2 $rt, $rd", + (DMFC2_3OP64 CPU64Regs:$rt, CPU64Regs:$rd, 0)>; +def : InstAlias<"dmtc2 $rt, $rd", + (DMTC2_3OP64 CPU64Regs:$rd, 0, CPU64Regs:$rt)>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 8269ec6a61..1d1ab71962 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -22,15 +22,15 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/DataLayout.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 932e6b3d1e..52fa95b182 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -28,8 +28,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 457d238a01..559370bc2a 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -16,42 +16,37 @@ // MipsISelLowering::EmitInstrWithCustomInserter if target does not have // conditional move instructions. // cond:int, data:int -class CondMovIntInt<RegisterClass CRC, RegisterClass DRC, bits<6> funct, - string instr_asm> : - FR<0, funct, (outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F), - !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> { - let shamt = 0; +class CMov_I_I_FT<string opstr, RegisterClass CRC, RegisterClass DRC, + InstrItinClass Itin> : + InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F), + !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> { let Constraints = "$F = $rd"; } // cond:int, data:float -class CondMovIntFP<RegisterClass CRC, RegisterClass DRC, bits<5> fmt, - bits<6> func, string instr_asm> : - FFR<0x11, func, fmt, (outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F), - !strconcat(instr_asm, "\t$fd, $fs, $rt"), []> { - bits<5> rt; - let ft = rt; +class CMov_I_F_FT<string opstr, RegisterClass CRC, RegisterClass DRC, + InstrItinClass Itin> : + InstSE<(outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F), + !strconcat(opstr, "\t$fd, $fs, $rt"), [], Itin, FrmFR> { let Constraints = "$F = $fd"; } // cond:float, data:int -class CondMovFPInt<RegisterClass RC, SDNode cmov, bits<1> tf, - string instr_asm> : - FCMOV<tf, (outs RC:$rd), (ins RC:$rs, RC:$F), - !strconcat(instr_asm, "\t$rd, $rs, $$fcc0"), - [(set RC:$rd, (cmov RC:$rs, RC:$F))]> { - let cc = 0; +class CMov_F_I_FT<string opstr, RegisterClass RC, InstrItinClass Itin, + SDPatternOperator OpNode = null_frag> : + InstSE<(outs RC:$rd), (ins RC:$rs, RC:$F), + !strconcat(opstr, "\t$rd, $rs, $$fcc0"), + [(set RC:$rd, (OpNode RC:$rs, RC:$F))], Itin, FrmFR> { let Uses = [FCR31]; let Constraints = "$F = $rd"; } // cond:float, data:float -class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf, - string instr_asm> : - FFCMOV<fmt, tf, (outs RC:$fd), (ins RC:$fs, RC:$F), - !strconcat(instr_asm, "\t$fd, $fs, $$fcc0"), - [(set RC:$fd, (cmov RC:$fs, RC:$F))]> { - let cc = 0; +class CMov_F_F_FT<string opstr, RegisterClass RC, InstrItinClass Itin, + SDPatternOperator OpNode = null_frag> : + InstSE<(outs RC:$fd), (ins RC:$fs, RC:$F), + !strconcat(opstr, "\t$fd, $fs, $$fcc0"), + [(set RC:$fd, (OpNode RC:$fs, RC:$F))], Itin, FrmFR> { let Uses = [FCR31]; let Constraints = "$F = $fd"; } @@ -106,81 +101,103 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst, } // Instantiation of instructions. -def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">; +def MOVZ_I_I : CMov_I_I_FT<"movz", CPURegs, CPURegs, NoItinerary>, + ADD_FM<0, 0xa>; let Predicates = [HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">; - def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> { + def MOVZ_I_I64 : CMov_I_I_FT<"movz", CPURegs, CPU64Regs, NoItinerary>, + ADD_FM<0, 0xa>; + def MOVZ_I64_I : CMov_I_I_FT<"movz", CPU64Regs, CPURegs, NoItinerary>, + ADD_FM<0, 0xa> { let isCodeGenOnly = 1; } - def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz"> { + def MOVZ_I64_I64 : CMov_I_I_FT<"movz", CPU64Regs, CPU64Regs, NoItinerary>, + ADD_FM<0, 0xa> { let isCodeGenOnly = 1; } } -def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">; +def MOVN_I_I : CMov_I_I_FT<"movn", CPURegs, CPURegs, NoItinerary>, + ADD_FM<0, 0xb>; let Predicates = [HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">; - def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> { + def MOVN_I_I64 : CMov_I_I_FT<"movn", CPURegs, CPU64Regs, NoItinerary>, + ADD_FM<0, 0xb>; + def MOVN_I64_I : CMov_I_I_FT<"movn", CPU64Regs, CPURegs, NoItinerary>, + ADD_FM<0, 0xb> { let isCodeGenOnly = 1; } - def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn"> { + def MOVN_I64_I64 : CMov_I_I_FT<"movn", CPU64Regs, CPU64Regs, NoItinerary>, + ADD_FM<0, 0xb> { let isCodeGenOnly = 1; } } -def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">; -def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">, - Requires<[HasMips64, HasStdEnc]> { +def MOVZ_I_S : CMov_I_F_FT<"movz.s", CPURegs, FGR32, IIFmove>, + CMov_I_F_FM<18, 16>; +def MOVZ_I64_S : CMov_I_F_FT<"movz.s", CPU64Regs, FGR32, IIFmove>, + CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } -def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">; -def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">, - Requires<[HasMips64, HasStdEnc]> { +def MOVN_I_S : CMov_I_F_FT<"movn.s", CPURegs, FGR32, IIFmove>, + CMov_I_F_FM<19, 16>; +def MOVN_I64_S : CMov_I_F_FT<"movn.s", CPU64Regs, FGR32, IIFmove>, + CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">; - def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">; + def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", CPURegs, AFGR64, IIFmove>, + CMov_I_F_FM<18, 17>; + def MOVN_I_D32 : CMov_I_F_FT<"movn.d", CPURegs, AFGR64, IIFmove>, + CMov_I_F_FM<19, 17>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">; - def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> { + def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", CPURegs, FGR64, IIFmove>, + CMov_I_F_FM<18, 17>; + def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", CPU64Regs, FGR64, IIFmove>, + CMov_I_F_FM<18, 17> { let isCodeGenOnly = 1; } - def MOVN_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">; - def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d"> { + def MOVN_I_D64 : CMov_I_F_FT<"movn.d", CPURegs, FGR64, IIFmove>, + CMov_I_F_FM<19, 17>; + def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", CPU64Regs, FGR64, IIFmove>, + CMov_I_F_FM<19, 17> { let isCodeGenOnly = 1; } } -def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">; -def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">, - Requires<[HasMips64, HasStdEnc]> { +def MOVT_I : CMov_F_I_FT<"movt", CPURegs, IIAlu, MipsCMovFP_T>, CMov_F_I_FM<1>; +def MOVT_I64 : CMov_F_I_FT<"movt", CPU64Regs, IIAlu, MipsCMovFP_T>, + CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } -def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">; -def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">, - Requires<[HasMips64, HasStdEnc]> { +def MOVF_I : CMov_F_I_FT<"movf", CPURegs, IIAlu, MipsCMovFP_F>, CMov_F_I_FM<0>; +def MOVF_I64 : CMov_F_I_FT<"movf", CPU64Regs, IIAlu, MipsCMovFP_F>, + CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } -def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">; -def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">; +def MOVT_S : CMov_F_F_FT<"movt.s", FGR32, IIFmove, MipsCMovFP_T>, + CMov_F_F_FM<16, 1>; +def MOVF_S : CMov_F_F_FT<"movf.s", FGR32, IIFmove, MipsCMovFP_F>, + CMov_F_F_FM<16, 0>; let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">; - def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">; + def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64, IIFmove, MipsCMovFP_T>, + CMov_F_F_FM<17, 1>; + def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64, IIFmove, MipsCMovFP_F>, + CMov_F_F_FM<17, 0>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">; - def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">; + def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64, IIFmove, MipsCMovFP_T>, + CMov_F_F_FM<17, 1>; + def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64, IIFmove, MipsCMovFP_F>, + CMov_F_F_FM<17, 0>; } // Instantiation of conditional move patterns. diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index 8e01d06596..a72a763fde 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -24,8 +24,9 @@ class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { let Predicates = [HasDSP]; } -class PseudoDSP<dag outs, dag ins, list<dag> pattern>: - MipsPseudo<outs, ins, "", pattern> { +class PseudoDSP<dag outs, dag ins, list<dag> pattern, + InstrItinClass itin = IIPseudo>: + MipsPseudo<outs, ins, pattern, itin> { let Predicates = [HasDSP]; } diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index ef9402865b..9531b91487 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -75,10 +75,6 @@ def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; // Flags. -class IsCommutable { - bit isCommutable = 1; -} - class UseAC { list<Register> Uses = [AC0]; } @@ -490,7 +486,7 @@ class MULT_DESC_BASE<string instr_asm> { } class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : - MipsPseudo<(outs CPURegs:$dst), (ins), "", [(set CPURegs:$dst, (OpNode))]> { + MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> { list<Register> Uses = [DSPCtrl]; bit usesCustomInserter = 1; } diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index e3cdab427d..eb9d49fefb 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -22,8 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 0a8c0b7994..0a7031acad 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -26,15 +26,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Type.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 89b9cd150e..1a78913efe 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -21,7 +21,6 @@ #include "MipsTargetMachine.h" #include "MipsTargetObjectFile.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,10 +28,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -50,6 +50,13 @@ static cl::opt<bool> LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); +static cl::opt<bool> +Mips16HardFloat("mips16-hard-float", cl::NotHidden, + cl::desc("MIPS: mips16 hard float enable."), + cl::init(false)); + + + static const uint16_t O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -198,6 +205,41 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +void MipsTargetLowering::setMips16HardFloatLibCalls() { + setLibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + setLibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + setLibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + setLibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + setLibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + setLibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + setLibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + setLibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + setLibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + setLibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + setLibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + setLibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + setLibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + setLibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + setLibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + setLibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + setLibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + setLibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + setLibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + setLibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + setLibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + setLibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + setLibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + setLibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + setLibcallName(RTLIB::O_F64, "__mips16_unorddf2"); +} + MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), @@ -218,6 +260,8 @@ MipsTargetLowering(MipsTargetMachine &TM) if (Subtarget->inMips16Mode()) { addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + if (Mips16HardFloat) + setMips16HardFloatLibCalls(); } if (Subtarget->hasDSP()) { @@ -488,7 +532,9 @@ MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i32; + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); } // SelectMadd - @@ -1020,7 +1066,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { - assert(RC->contains(PReg) && "Not the correct regclass!"); unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(PReg, VReg); return VReg; @@ -2234,7 +2279,7 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA; MFI->setReturnAddressIsTaken(true); @@ -2932,12 +2977,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 - bool GlobalOrExternal = false; + bool GlobalOrExternal = false, InternalLinkage = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { if (IsPICCall) { - if (G->getGlobal()->hasInternalLinkage()) + InternalLinkage = G->getGlobal()->hasInternalLinkage(); + + if (InternalLinkage) Callee = getAddrLocal(Callee, DAG, HasMips64); else if (LargeGOT) Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16, @@ -2984,8 +3031,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } // Insert node "GP copy globalreg" before call to function. - // Lazy-binding stubs require GP to point to the GOT. - if (IsPICCall) { + // + // R_MIPS_CALL* operators (emitted when non-internal functions are called + // in PIC mode) allow symbols to be resolved via lazy binding. + // The lazy binding stub requires GP to point to the GOT. + if (IsPICCall && !InternalLinkage) { unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; EVT Ty = IsN64 ? MVT::i64 : MVT::i32; RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty))); @@ -3134,7 +3184,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, const TargetRegisterClass *RC; if (RegVT == MVT::i32) - RC = &Mips::CPURegsRegClass; + RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass : + &Mips::CPURegsRegClass; else if (RegVT == MVT::i64) RC = &Mips::CPU64RegsRegClass; else if (RegVT == MVT::f32) @@ -3559,7 +3610,8 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsZeroVal, + unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (Subtarget->hasMips64()) @@ -3738,7 +3790,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains, return; // Copy arg registers. - EVT RegTy = MVT::getIntegerVT(CC.regSize() * 8); + MVT RegTy = MVT::getIntegerVT(CC.regSize() * 8); const TargetRegisterClass *RC = getRegClassFor(RegTy); for (unsigned I = 0; I < ByVal.NumRegs; ++I) { @@ -3860,7 +3912,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains, const CCState &CCInfo = CC.getCCInfo(); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs); unsigned RegSize = CC.regSize(); - EVT RegTy = MVT::getIntegerVT(RegSize * 8); + MVT RegTy = MVT::getIntegerVT(RegSize * 8); const TargetRegisterClass *RC = getRegClassFor(RegTy); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 08165a036c..12b01e919e 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -174,6 +174,8 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: + void setMips16HardFloatLibCalls(); + /// ByValArgInfo - Byval argument information. struct ByValArgInfo { unsigned FirstIdx; // Index of the first register used. @@ -368,7 +370,8 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsZeroVal, + unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 5daed320d2..f6d82daad6 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -86,273 +86,320 @@ def fpimm0neg : PatLeaf<(fpimm), [{ // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// -// FP load. -let DecoderMethod = "DecodeFMem" in { -class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: - FMem<op, (outs RC:$ft), (ins MemOpnd:$addr), - !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load addr:$addr))], - IILoad>; - -// FP store. -class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: - FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr), - !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)], - IIStore>; -} -// FP indexed load. -class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, - RegisterClass PRC, SDPatternOperator FOp = null_frag>: - FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fd, ${index}(${base})"), - [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> { - let fs = 0; -} - -// FP indexed store. -class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC, - RegisterClass PRC, SDPatternOperator FOp= null_frag>: - FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fs, ${index}(${base})"), - [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> { - let fd = 0; -} - -// Instructions that convert an FP value to 32-bit fixed point. -multiclass FFR1_W_M<bits<6> funct, string opstr> { - def _S : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>; - def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>, +class ADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, bit IsComm, + SDPatternOperator OpNode= null_frag> : + InstSE<(outs RC:$fd), (ins RC:$fs, RC:$ft), + !strconcat(opstr, "\t$fd, $fs, $ft"), + [(set RC:$fd, (OpNode RC:$fs, RC:$ft))], Itin, FrmFR> { + let isCommutable = IsComm; +} + +multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm, + SDPatternOperator OpNode = null_frag> { + def _D32 : ADDS_FT<opstr, AFGR64, Itin, IsComm, OpNode>, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>, + def _D64 : ADDS_FT<opstr, FGR64, Itin, IsComm, OpNode>, Requires<[IsFP64bit, HasStdEnc]> { - let DecoderNamespace = "Mips64"; + string DecoderNamespace = "Mips64"; } } -// Instructions that convert an FP value to 64-bit fixed point. -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in -multiclass FFR1_L_M<bits<6> funct, string opstr> { - def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>; - def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>; -} +class ABSS_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$fd), (ins SrcRC:$fs), !strconcat(opstr, "\t$fd, $fs"), + [(set DstRC:$fd, (OpNode SrcRC:$fs))], Itin, FrmFR>; -// FP-to-FP conversion instructions. -multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> { - def _S : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>; - def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>, +multiclass ABSS_M<string opstr, InstrItinClass Itin, + SDPatternOperator OpNode= null_frag> { + def _D32 : ABSS_FT<opstr, AFGR64, AFGR64, Itin, OpNode>, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>, + def _D64 : ABSS_FT<opstr, FGR64, FGR64, Itin, OpNode>, Requires<[IsFP64bit, HasStdEnc]> { - let DecoderNamespace = "Mips64"; + string DecoderNamespace = "Mips64"; } } -multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { - let isCommutable = isComm in { - def _S : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>; - def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>, +multiclass ROUND_M<string opstr, InstrItinClass Itin> { + def _D32 : ABSS_FT<opstr, FGR32, AFGR64, Itin>, Requires<[NotFP64bit, HasStdEnc]>; - def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>, + def _D64 : ABSS_FT<opstr, FGR32, FGR64, Itin>, Requires<[IsFP64bit, HasStdEnc]> { let DecoderNamespace = "Mips64"; } } -} -// FP madd/msub/nmadd/nmsub instruction classes. -class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr, - SDNode OpNode, RegisterClass RC> : - FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), - !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"), - [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>; - -class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr, - SDNode OpNode, RegisterClass RC> : - FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), - !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"), - [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>; +class MFC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; + +class MTC1_FT<string opstr, RegisterClass DstRC, RegisterClass SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; + +class MFC1_FT_CCR<string opstr, RegisterClass DstRC, RegisterOperand SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$rt), (ins SrcRC:$fs), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$rt, (OpNode SrcRC:$fs))], Itin, FrmFR>; + +class MTC1_FT_CCR<string opstr, RegisterOperand DstRC, RegisterClass SrcRC, + InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : + InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), + [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>; + +class LW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, + Operand MemOpnd, SDPatternOperator OpNode= null_frag> : + InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(set RC:$rt, (OpNode addr:$addr))], Itin, FrmFI> { + let DecoderMethod = "DecodeFMem"; +} + +class SW_FT<string opstr, RegisterClass RC, InstrItinClass Itin, + Operand MemOpnd, SDPatternOperator OpNode= null_frag> : + InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(OpNode RC:$rt, addr:$addr)], Itin, FrmFI> { + let DecoderMethod = "DecodeFMem"; +} + +class MADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, + SDPatternOperator OpNode = null_frag> : + InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), + !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), + [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))], Itin, FrmFR>; + +class NMADDS_FT<string opstr, RegisterClass RC, InstrItinClass Itin, + SDPatternOperator OpNode = null_frag> : + InstSE<(outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft), + !strconcat(opstr, "\t$fd, $fr, $fs, $ft"), + [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))], + Itin, FrmFR>; + +class LWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC, + InstrItinClass Itin, SDPatternOperator OpNode = null_frag> : + InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index), + !strconcat(opstr, "\t$fd, ${index}(${base})"), + [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI>; + +class SWXC1_FT<string opstr, RegisterClass DRC, RegisterClass PRC, + InstrItinClass Itin, SDPatternOperator OpNode = null_frag> : + InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index), + !strconcat(opstr, "\t$fs, ${index}(${base})"), + [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI>; + +class BC1F_FT<string opstr, InstrItinClass Itin, + SDPatternOperator Op = null_frag> : + InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"), + [(MipsFPBrcond Op, bb:$offset)], Itin, FrmFI> { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; + let Defs = [AT]; + let Uses = [FCR31]; +} + +class CEQS_FT<string typestr, RegisterClass RC, InstrItinClass Itin, + SDPatternOperator OpNode = null_frag> : + InstSE<(outs), (ins RC:$fs, RC:$ft, condcode:$cond), + !strconcat("c.$cond.", typestr, "\t$fs, $ft"), + [(OpNode RC:$fs, RC:$ft, imm:$cond)], Itin, FrmFR> { + let Defs = [FCR31]; +} //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// -defm ROUND_W : FFR1_W_M<0xc, "round">; -defm ROUND_L : FFR1_L_M<0x8, "round">; -defm TRUNC_W : FFR1_W_M<0xd, "trunc">; -defm TRUNC_L : FFR1_L_M<0x9, "trunc">; -defm CEIL_W : FFR1_W_M<0xe, "ceil">; -defm CEIL_L : FFR1_L_M<0xa, "ceil">; -defm FLOOR_W : FFR1_W_M<0xf, "floor">; -defm FLOOR_L : FFR1_L_M<0xb, "floor">; -defm CVT_W : FFR1_W_M<0x24, "cvt">, NeverHasSideEffects; -//defm CVT_L : FFR1_L_M<0x25, "cvt">; - -def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects; -def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects; -def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects; +def ROUND_W_S : ABSS_FT<"round.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xc, 16>; +def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xd, 16>; +def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xe, 16>; +def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0xf, 16>; +def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32, FGR32, IIFcvt>, ABSS_FM<0x24, 16>, + NeverHasSideEffects; + +defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>; +defm TRUNC_W : ROUND_M<"trunc.w.d", IIFcvt>, ABSS_FM<0xd, 17>; +defm CEIL_W : ROUND_M<"ceil.w.d", IIFcvt>, ABSS_FM<0xe, 17>; +defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>; +defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>, + NeverHasSideEffects; + +let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { + def ROUND_L_S : ABSS_FT<"round.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x8, 16>; + def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64, FGR64, IIFcvt>, + ABSS_FM<0x8, 17>; + def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x9, 16>; + def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64, FGR64, IIFcvt>, + ABSS_FM<0x9, 17>; + def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xa, 16>; + def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0xa, 17>; + def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0xb, 16>; + def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64, FGR64, IIFcvt>, + ABSS_FM<0xb, 17>; +} + +def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32, FGR32, IIFcvt>, ABSS_FM<0x20, 20>; +def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x25, 16>, + NeverHasSideEffects; +def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64, FGR64, IIFcvt>, ABSS_FM<0x25, 17>, + NeverHasSideEffects; let Predicates = [NotFP64bit, HasStdEnc], neverHasSideEffects = 1 in { - def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; - def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>; - def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; + def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32, AFGR64, IIFcvt>, ABSS_FM<0x20, 17>; + def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; + def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>; } let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64", neverHasSideEffects = 1 in { - def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; - def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; - def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; - def CVT_D64_S : FFR1<0x21, 16, "cvt", "d.s", FGR64, FGR32>; - def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>; + def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 17>; + def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32, FGR64, IIFcvt>, ABSS_FM<0x20, 21>; + def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 20>; + def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64, FGR32, IIFcvt>, ABSS_FM<0x21, 16>; + def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64, FGR64, IIFcvt>, ABSS_FM<0x21, 21>; } let Predicates = [NoNaNsFPMath, HasStdEnc] in { - defm FABS : FFR1P_M<0x5, "abs", fabs>; - defm FNEG : FFR1P_M<0x7, "neg", fneg>; + def FABS_S : ABSS_FT<"abs.s", FGR32, FGR32, IIFcvt, fabs>, ABSS_FM<0x5, 16>; + def FNEG_S : ABSS_FT<"neg.s", FGR32, FGR32, IIFcvt, fneg>, ABSS_FM<0x7, 16>; + defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>; + defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>; } -defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>; + +def FSQRT_S : ABSS_FT<"sqrt.s", FGR32, FGR32, IIFsqrtSingle, fsqrt>, + ABSS_FM<0x4, 16>; +defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>; // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. // When defining instructions, we reference all 32-bit registers, // regardless of register aliasing. -class FFRGPR<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern>: - FFR<0x11, 0x0, _fmt, outs, ins, asmstr, pattern> { - bits<5> rt; - let ft = rt; - let fd = 0; -} - /// Move Control Registers From/To CPU Registers -def CFC1 : FFRGPR<0x2, (outs CPURegs:$rt), (ins CCR:$fs), - "cfc1\t$rt, $fs", []>; - -def CTC1 : FFRGPR<0x6, (outs CCR:$fs), (ins CPURegs:$rt), - "ctc1\t$rt, $fs", []>; - -def MFC1 : FFRGPR<0x00, (outs CPURegs:$rt), (ins FGR32:$fs), - "mfc1\t$rt, $fs", - [(set CPURegs:$rt, (bitconvert FGR32:$fs))]>; - -def MTC1 : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt), - "mtc1\t$rt, $fs", - [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>; - -def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs), - "dmfc1\t$rt, $fs", - [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>; - -def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt), - "dmtc1\t$rt, $fs", - [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>; - -def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; -def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, +def CFC1 : MFC1_FT_CCR<"cfc1", CPURegs, CCROpnd, IIFmove>, MFC1_FM<2>; +def CTC1 : MTC1_FT_CCR<"ctc1", CCROpnd, CPURegs, IIFmove>, MFC1_FM<6>; +def MFC1 : MFC1_FT<"mfc1", CPURegs, FGR32, IIFmove, bitconvert>, MFC1_FM<0>; +def MTC1 : MTC1_FT<"mtc1", FGR32, CPURegs, IIFmove, bitconvert>, MFC1_FM<4>; +def DMFC1 : MFC1_FT<"dmfc1", CPU64Regs, FGR64, IIFmove, bitconvert>, MFC1_FM<1>; +def DMTC1 : MTC1_FT<"dmtc1", FGR64, CPU64Regs, IIFmove, bitconvert>, MFC1_FM<5>; + +def FMOV_S : ABSS_FT<"mov.s", FGR32, FGR32, IIFmove>, ABSS_FM<0x6, 16>; +def FMOV_D32 : ABSS_FT<"mov.d", AFGR64, AFGR64, IIFmove>, ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>; -def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, +def FMOV_D64 : ABSS_FT<"mov.d", FGR64, FGR64, IIFmove>, ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> { let DecoderNamespace = "Mips64"; } /// Floating Point Memory Instructions let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>; - def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>; - def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> { + def LWC1_P8 : LW_FT<"lwc1", FGR32, IILoad, mem64, load>, LW_FM<0x31>; + def SWC1_P8 : SW_FT<"swc1", FGR32, IIStore, mem64, store>, LW_FM<0x39>; + def LDC164_P8 : LW_FT<"ldc1", FGR64, IILoad, mem64, load>, LW_FM<0x35> { let isCodeGenOnly =1; } - def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64> { + def SDC164_P8 : SW_FT<"sdc1", FGR64, IIStore, mem64, store>, LW_FM<0x3d> { let isCodeGenOnly =1; } } let Predicates = [NotN64, HasStdEnc] in { - def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>; - def SWC1 : FPStore<0x39, "swc1", FGR32, mem>; + def LWC1 : LW_FT<"lwc1", FGR32, IILoad, mem, load>, LW_FM<0x31>; + def SWC1 : SW_FT<"swc1", FGR32, IIStore, mem, store>, LW_FM<0x39>; } let Predicates = [NotN64, HasMips64, HasStdEnc], DecoderNamespace = "Mips64" in { - def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; - def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; + def LDC164 : LW_FT<"ldc1", FGR64, IILoad, mem, load>, LW_FM<0x35>; + def SDC164 : SW_FT<"sdc1", FGR64, IIStore, mem, store>, LW_FM<0x3d>; } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>; - def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>; + def LDC1 : LW_FT<"ldc1", AFGR64, IILoad, mem, load>, LW_FM<0x35>; + def SDC1 : SW_FT<"sdc1", AFGR64, IIStore, mem, store>, LW_FM<0x3d>; } // Indexed loads and stores. let Predicates = [HasFPIdx, IsNotNaCl/*@LOCALMOD*/] in { - def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>; - def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>; + def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>; + def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>; } let Predicates = [HasMips32r2, NotMips64, IsNotNaCl/*@LOCALMOD*/] in { - def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load>; - def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store>; + def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>; + def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>; } -let Predicates = [HasMips64, NotN64, IsNotNaCl/*@LOCALMOD*/], - DecoderNamespace="Mips64" in { - def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load>; - def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store>; +let Predicates = [HasMips64, NotN64, IsNotNaCl/*@LOCALMOD*/], DecoderNamespace="Mips64" in { + def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>; + def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>; } // n64 let Predicates = [IsN64, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in { - def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load>; - def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load>; - def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store>; - def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store>; + def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>; + def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>, + LWXC1_FM<1>; + def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32, CPU64Regs, IIStore, store>, + SWXC1_FM<8>; + def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64, CPU64Regs, IIStore, store>, + SWXC1_FM<9>; } // Load/store doubleword indexed unaligned. let Predicates = [NotMips64, HasStdEnc] in { - def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>; - def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>; + def LUXC1 : LWXC1_FT<"luxc1", AFGR64, CPURegs, IILoad>, LWXC1_FM<0x5>; + def SUXC1 : SWXC1_FT<"suxc1", AFGR64, CPURegs, IIStore>, SWXC1_FM<0xd>; } let Predicates = [HasMips64, HasStdEnc], DecoderNamespace="Mips64" in { - def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>; - def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>; + def LUXC164 : LWXC1_FT<"luxc1", FGR64, CPURegs, IILoad>, LWXC1_FM<0x5>; + def SUXC164 : SWXC1_FT<"suxc1", FGR64, CPURegs, IIStore>, SWXC1_FM<0xd>; } /// Floating-point Aritmetic -defm FADD : FFR2P_M<0x00, "add", fadd, 1>; -defm FDIV : FFR2P_M<0x03, "div", fdiv>; -defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>; -defm FSUB : FFR2P_M<0x01, "sub", fsub>; +def FADD_S : ADDS_FT<"add.s", FGR32, IIFadd, 1, fadd>, ADDS_FM<0x00, 16>; +defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>; +def FDIV_S : ADDS_FT<"div.s", FGR32, IIFdivSingle, 0, fdiv>, ADDS_FM<0x03, 16>; +defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>; +def FMUL_S : ADDS_FT<"mul.s", FGR32, IIFmulSingle, 1, fmul>, ADDS_FM<0x02, 16>; +defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>; +def FSUB_S : ADDS_FT<"sub.s", FGR32, IIFadd, 0, fsub>, ADDS_FM<0x01, 16>; +defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>; let Predicates = [HasMips32r2, HasStdEnc] in { - def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>; - def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>; + def MADD_S : MADDS_FT<"madd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<4, 0>; + def MSUB_S : MADDS_FT<"msub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<5, 0>; } let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in { - def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>; - def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>; + def NMADD_S : NMADDS_FT<"nmadd.s", FGR32, IIFmulSingle, fadd>, MADDS_FM<6, 0>; + def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32, IIFmulSingle, fsub>, MADDS_FM<7, 0>; } let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in { - def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>; - def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>; + def MADD_D32 : MADDS_FT<"madd.d", AFGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>; + def MSUB_D32 : MADDS_FT<"msub.d", AFGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in { - def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>; - def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>; + def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64, IIFmulDouble, fadd>, + MADDS_FM<6, 1>; + def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64, IIFmulDouble, fsub>, + MADDS_FM<7, 1>; } let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in { - def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>; - def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>; + def MADD_D64 : MADDS_FT<"madd.d", FGR64, IIFmulDouble, fadd>, MADDS_FM<4, 1>; + def MSUB_D64 : MADDS_FT<"msub.d", FGR64, IIFmulDouble, fsub>, MADDS_FM<5, 1>; } let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc], isCodeGenOnly=1 in { - def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>; - def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>; + def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64, IIFmulDouble, fadd>, + MADDS_FM<6, 1>; + def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64, IIFmulDouble, fsub>, + MADDS_FM<7, 1>; } //===----------------------------------------------------------------------===// @@ -363,19 +410,9 @@ let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc], def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -/// Floating Point Branch of False/True (Likely) -let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in - class FBRANCH<bits<1> nd, bits<1> tf, PatLeaf op, string asmstr> : - FFI<0x11, (outs), (ins brtarget:$dst), !strconcat(asmstr, "\t$dst"), - [(MipsFPBrcond op, bb:$dst)]> { - let Inst{20-18} = 0; - let Inst{17} = nd; - let Inst{16} = tf; -} - let DecoderMethod = "DecodeBC1" in { -def BC1F : FBRANCH<0, 0, MIPS_BRANCH_F, "bc1f">; -def BC1T : FBRANCH<0, 1, MIPS_BRANCH_T, "bc1t">; +def BC1F : BC1F_FT<"bc1f", IIBranch, MIPS_BRANCH_F>, BC1F_FM<0, 0>; +def BC1T : BC1F_FT<"bc1t", IIBranch, MIPS_BRANCH_T>, BC1F_FM<0, 1>; } //===----------------------------------------------------------------------===// // Floating Point Flag Conditions @@ -399,33 +436,24 @@ def MIPS_FCOND_NGE : PatLeaf<(i32 13)>; def MIPS_FCOND_LE : PatLeaf<(i32 14)>; def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; -class FCMP<bits<5> fmt, RegisterClass RC, string typestr> : - FCC<fmt, (outs), (ins RC:$fs, RC:$ft, condcode:$cc), - !strconcat("c.$cc.", typestr, "\t$fs, $ft"), - [(MipsFPCmp RC:$fs, RC:$ft, imm:$cc)]>; - /// Floating Point Compare -let Defs=[FCR31] in { - def FCMP_S32 : FCMP<0x10, FGR32, "s">; - def FCMP_D32 : FCMP<0x11, AFGR64, "d">, - Requires<[NotFP64bit, HasStdEnc]>; - def FCMP_D64 : FCMP<0x11, FGR64, "d">, - Requires<[IsFP64bit, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - } -} +def FCMP_S32 : CEQS_FT<"s", FGR32, IIFcmp, MipsFPCmp>, CEQS_FM<16>; +def FCMP_D32 : CEQS_FT<"d", AFGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, + Requires<[NotFP64bit, HasStdEnc]>; +let DecoderNamespace = "Mips64" in +def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>, + Requires<[IsFP64bit, HasStdEnc]>; //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), - "# MOVCCRToCCR", []>; +def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCROpnd:$src), []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. def BuildPairF64 : PseudoSE<(outs AFGR64:$dst), - (ins CPURegs:$lo, CPURegs:$hi), "", + (ins CPURegs:$lo, CPURegs:$hi), [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register @@ -433,7 +461,7 @@ def BuildPairF64 : // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. def ExtractElementF64 : - PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "", + PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index c3c12c1fea..c026b5dae0 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -80,15 +80,17 @@ class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, } // Mips Pseudo Instructions Format -class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> { +class MipsPseudo<dag outs, dag ins, list<dag> pattern, + InstrItinClass itin = IIPseudo> : + MipsInst<outs, ins, "", pattern, itin, Pseudo> { let isCodeGenOnly = 1; let isPseudo = 1; } // Mips32/64 Pseudo Instruction Format -class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsPseudo<outs, ins, asmstr, pattern> { +class PseudoSE<dag outs, dag ins, list<dag> pattern, + InstrItinClass itin = IIPseudo>: + MipsPseudo<outs, ins, pattern, itin> { let Predicates = [HasStdEnc]; } @@ -161,30 +163,28 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, // Format J instruction class in Mips : <|opcode|address|> //===----------------------------------------------------------------------===// -class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ> +class FJ<bits<6> op> { - bits<26> addr; + bits<26> target; - let Opcode = op; + bits<32> Inst; - let Inst{25-0} = addr; + let Inst{31-26} = op; + let Inst{25-0} = target; } - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // MFC instruction class in Mips : <|op|mf|rt|rd|0000000|sel|> //===----------------------------------------------------------------------===// -class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>: - InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR> +class MFC3OP_FM<bits<6> op, bits<5> mfmt> { - bits<5> mfmt; bits<5> rt; bits<5> rd; bits<3> sel; - let Opcode = op; - let mfmt = _mfmt; + bits<32> Inst; + let Inst{31-26} = op; let Inst{25-21} = mfmt; let Inst{20-16} = rt; let Inst{15-11} = rd; @@ -192,6 +192,275 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>: let Inst{2-0} = sel; } +class ADD_FM<bits<6> op, bits<6> funct> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +class ADDI_FM<bits<6> op> { + bits<5> rs; + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +class SRA_FM<bits<6> funct, bit rotate> { + bits<5> rd; + bits<5> rt; + bits<5> shamt; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-22} = 0; + let Inst{21} = rotate; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = shamt; + let Inst{5-0} = funct; +} + +class SRLV_FM<bits<6> funct, bit rotate> { + bits<5> rd; + bits<5> rt; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-7} = 0; + let Inst{6} = rotate; + let Inst{5-0} = funct; +} + +class BEQ_FM<bits<6> op> { + bits<5> rs; + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = offset; +} + +class BGEZ_FM<bits<6> op, bits<5> funct> { + bits<5> rs; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = funct; + let Inst{15-0} = offset; +} + +class B_FM { + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = 4; + let Inst{25-21} = 0; + let Inst{20-16} = 0; + let Inst{15-0} = offset; +} + +class SLTI_FM<bits<6> op> { + bits<5> rt; + bits<5> rs; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +class MFLO_FM<bits<6> funct> { + bits<5> rd; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-16} = 0; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +class MTLO_FM<bits<6> funct> { + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rs; + let Inst{20-6} = 0; + let Inst{5-0} = funct; +} + +class SEB_FM<bits<5> funct, bits<6> funct2> { + bits<5> rd; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x1f; + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = funct; + let Inst{5-0} = funct2; +} + +class CLO_FM<bits<6> funct> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x1c; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = funct; + let rt = rd; +} + +class LUI_FM { + bits<5> rt; + bits<16> imm16; + + bits<32> Inst; + + let Inst{31-26} = 0xf; + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-0} = imm16; +} + +class NOP_FM { + bits<32> Inst; + + let Inst{31-0} = 0; +} + +class JALR_FM { + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = 31; + let Inst{10-6} = 0; + let Inst{5-0} = 9; +} + +class BAL_FM { + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = 1; + let Inst{25-21} = 0; + let Inst{20-16} = 0x11; + let Inst{15-0} = offset; +} + +class BGEZAL_FM<bits<5> funct> { + bits<5> rs; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = 1; + let Inst{25-21} = rs; + let Inst{20-16} = funct; + let Inst{15-0} = offset; +} + +class SYNC_FM { + bits<5> stype; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{10-6} = stype; + let Inst{5-0} = 0xf; +} + +class MULT_FM<bits<6> op, bits<6> funct> { + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-6} = 0; + let Inst{5-0} = funct; +} + +class EXT_FM<bits<6> funct> { + bits<5> rt; + bits<5> rs; + bits<5> pos; + bits<5> size; + + bits<32> Inst; + + let Inst{31-26} = 0x1f; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = size; + let Inst{10-6} = pos; + let Inst{5-0} = funct; +} + +class RDHWR_FM { + bits<5> rt; + bits<5> rd; + + bits<32> Inst; + + let Inst{31-26} = 0x1f; + let Inst{25-21} = 0; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = 0x3b; +} + //===----------------------------------------------------------------------===// // // FLOATING POINT INSTRUCTION FORMATS @@ -206,31 +475,6 @@ class MFC3OP<bits<6> op, bits<5> _mfmt, dag outs, dag ins, string asmstr>: //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|> -//===----------------------------------------------------------------------===// - -class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, - string asmstr, list<dag> pattern> : - InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR> -{ - bits<5> fd; - bits<5> fs; - bits<5> ft; - bits<5> fmt; - bits<6> funct; - - let Opcode = op; - let funct = _funct; - let fmt = _fmt; - - let Inst{25-21} = fmt; - let Inst{20-16} = ft; - let Inst{15-11} = fs; - let Inst{10-6} = fd; - let Inst{5-0} = funct; -} - -//===----------------------------------------------------------------------===// // Format FI instruction class in Mips : <|opcode|base|ft|immediate|> //===----------------------------------------------------------------------===// @@ -248,130 +492,179 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: let Inst{15-0} = imm16; } -//===----------------------------------------------------------------------===// -// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|> -//===----------------------------------------------------------------------===// - -class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : - InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> -{ - bits<5> fs; - bits<5> ft; - bits<4> cc; - bits<5> fmt; +class ADDS_FM<bits<6> funct, bits<5> fmt> { + bits<5> fd; + bits<5> fs; + bits<5> ft; - let Opcode = 0x11; - let fmt = _fmt; + bits<32> Inst; + let Inst{31-26} = 0x11; let Inst{25-21} = fmt; let Inst{20-16} = ft; let Inst{15-11} = fs; - let Inst{10-6} = 0; - let Inst{5-4} = 0b11; - let Inst{3-0} = cc; + let Inst{10-6} = fd; + let Inst{5-0} = funct; } +class ABSS_FM<bits<6> funct, bits<5> fmt> { + bits<5> fd; + bits<5> fs; -class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr, - list<dag> pattern> : - InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> -{ - bits<5> rd; - bits<5> rs; - bits<3> cc; - bits<1> tf; - - let Opcode = 0; - let tf = _tf; + bits<32> Inst; - let Inst{25-21} = rs; - let Inst{20-18} = cc; - let Inst{17} = 0; - let Inst{16} = tf; - let Inst{15-11} = rd; - let Inst{10-6} = 0; - let Inst{5-0} = 1; + let Inst{31-26} = 0x11; + let Inst{25-21} = fmt; + let Inst{20-16} = 0; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = funct; } -class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, - list<dag> pattern> : - InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> -{ - bits<5> fd; - bits<5> fs; - bits<3> cc; - bits<5> fmt; - bits<1> tf; +class MFC1_FM<bits<5> funct> { + bits<5> rt; + bits<5> fs; - let Opcode = 17; - let fmt = _fmt; - let tf = _tf; + bits<32> Inst; - let Inst{25-21} = fmt; - let Inst{20-18} = cc; - let Inst{17} = 0; - let Inst{16} = tf; + let Inst{31-26} = 0x11; + let Inst{25-21} = funct; + let Inst{20-16} = rt; let Inst{15-11} = fs; - let Inst{10-6} = fd; - let Inst{5-0} = 17; + let Inst{10-0} = 0; } -// FP unary instructions without patterns. -class FFR1<bits<6> funct, bits<5> fmt, string opstr, string fmtstr, - RegisterClass DstRC, RegisterClass SrcRC> : - FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs), - !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), []> { - let ft = 0; -} +class LW_FM<bits<6> op> { + bits<5> rt; + bits<21> addr; -// FP unary instructions with patterns. -class FFR1P<bits<6> funct, bits<5> fmt, string opstr, string fmtstr, - RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode> : - FFR<0x11, funct, fmt, (outs DstRC:$fd), (ins SrcRC:$fs), - !strconcat(opstr, ".", fmtstr, "\t$fd, $fs"), - [(set DstRC:$fd, (OpNode SrcRC:$fs))]> { - let ft = 0; -} + bits<32> Inst; -class FFR2P<bits<6> funct, bits<5> fmt, string opstr, - string fmtstr, RegisterClass RC, SDNode OpNode> : - FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft), - !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"), - [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>; + let Inst{31-26} = op; + let Inst{25-21} = addr{20-16}; + let Inst{20-16} = rt; + let Inst{15-0} = addr{15-0}; +} -// Floating point madd/msub/nmadd/nmsub. -class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr, - list<dag> pattern> - : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { +class MADDS_FM<bits<3> funct, bits<3> fmt> { bits<5> fd; bits<5> fr; bits<5> fs; bits<5> ft; - let Opcode = 0x13; + bits<32> Inst; + + let Inst{31-26} = 0x13; let Inst{25-21} = fr; let Inst{20-16} = ft; let Inst{15-11} = fs; - let Inst{10-6} = fd; - let Inst{5-3} = funct; - let Inst{2-0} = fmt; + let Inst{10-6} = fd; + let Inst{5-3} = funct; + let Inst{2-0} = fmt; } -// FP indexed load/store instructions. -class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr, - list<dag> pattern> : - InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> -{ - bits<5> base; - bits<5> index; - bits<5> fs; - bits<5> fd; +class LWXC1_FM<bits<6> funct> { + bits<5> fd; + bits<5> base; + bits<5> index; - let Opcode = 0x13; + bits<32> Inst; + let Inst{31-26} = 0x13; + let Inst{25-21} = base; + let Inst{20-16} = index; + let Inst{15-11} = 0; + let Inst{10-6} = fd; + let Inst{5-0} = funct; +} + +class SWXC1_FM<bits<6> funct> { + bits<5> fs; + bits<5> base; + bits<5> index; + + bits<32> Inst; + + let Inst{31-26} = 0x13; let Inst{25-21} = base; let Inst{20-16} = index; let Inst{15-11} = fs; + let Inst{10-6} = 0; + let Inst{5-0} = funct; +} + +class BC1F_FM<bit nd, bit tf> { + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = 0x11; + let Inst{25-21} = 0x8; + let Inst{20-18} = 0; // cc + let Inst{17} = nd; + let Inst{16} = tf; + let Inst{15-0} = offset; +} + +class CEQS_FM<bits<5> fmt> { + bits<5> fs; + bits<5> ft; + bits<4> cond; + + bits<32> Inst; + + let Inst{31-26} = 0x11; + let Inst{25-21} = fmt; + let Inst{20-16} = ft; + let Inst{15-11} = fs; + let Inst{10-8} = 0; // cc + let Inst{7-4} = 0x3; + let Inst{3-0} = cond; +} + +class CMov_I_F_FM<bits<6> funct, bits<5> fmt> { + bits<5> fd; + bits<5> fs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = 0x11; + let Inst{25-21} = fmt; + let Inst{20-16} = rt; + let Inst{15-11} = fs; let Inst{10-6} = fd; let Inst{5-0} = funct; } + +class CMov_F_I_FM<bit tf> { + bits<5> rd; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; + let Inst{25-21} = rs; + let Inst{20-18} = 0; // cc + let Inst{17} = 0; + let Inst{16} = tf; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = 1; +} + +class CMov_F_F_FM<bits<5> fmt, bit tf> { + bits<5> fd; + bits<5> fs; + + bits<32> Inst; + + let Inst{31-26} = 0x11; + let Inst{25-21} = fmt; + let Inst{20-18} = 0; // cc + let Inst{17} = 0; + let Inst{16} = tf; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = 0x11; +} diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 8f8ab86e70..b81c975a16 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -175,6 +175,10 @@ class MipsPat<dag pattern, dag result> : Pat<pattern, result> { let Predicates = [HasStdEnc]; } +class IsCommutable { + bit isCommutable = 1; +} + class IsBranch { bit isBranch = 1; } @@ -296,6 +300,10 @@ def HI16 : SDNodeXForm<imm, [{ // e.g. addi, andi def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; +// Node immediate fits as 15-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt15 : PatLeaf<(imm), [{ return isInt<15>(N->getSExtValue()); }]>; + // Node immediate fits as 16-bit zero extended on target immediate. // The LO16 param means that only the lower 16 bits of the node // immediate are caught. @@ -325,104 +333,65 @@ def addr : // Instructions specific format //===----------------------------------------------------------------------===// -/// Move Control Registers From/To CPU Registers -def MFC0_3OP : MFC3OP<0x10, 0, (outs CPURegs:$rt), - (ins CPURegs:$rd, uimm16:$sel),"mfc0\t$rt, $rd, $sel">; -def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>; - -def MTC0_3OP : MFC3OP<0x10, 4, (outs CPURegs:$rd, uimm16:$sel), - (ins CPURegs:$rt),"mtc0\t$rt, $rd, $sel">; -def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>; - -def MFC2_3OP : MFC3OP<0x12, 0, (outs CPURegs:$rt), - (ins CPURegs:$rd, uimm16:$sel),"mfc2\t$rt, $rd, $sel">; -def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>; - -def MTC2_3OP : MFC3OP<0x12, 4, (outs CPURegs:$rd, uimm16:$sel), - (ins CPURegs:$rt),"mtc2\t$rt, $rd, $sel">; -def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>; - // Arithmetic and logical instructions with 3 register operands. -class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, - InstrItinClass itin, RegisterClass RC, bit isComm = 0>: - FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), - !strconcat(instr_asm, "\t$rd, $rs, $rt"), - [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> { - let shamt = 0; +class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0, + InstrItinClass Itin = NoItinerary, + SDPatternOperator OpNode = null_frag>: + InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt), + !strconcat(opstr, "\t$rd, $rs, $rt"), + [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> { let isCommutable = isComm; let isReMaterializable = 1; -} - -class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm, - InstrItinClass itin, RegisterClass RC, bit isComm = 0>: - FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), - !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], itin> { - let shamt = 0; - let isCommutable = isComm; + string BaseOpcode; + string Arch; } // Arithmetic and logical instructions with 2 register operands. -class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type, RegisterClass RC> : - FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16), - !strconcat(instr_asm, "\t$rt, $rs, $imm16"), - [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu> { +class ArithLogicI<string opstr, Operand Od, RegisterOperand RO, + SDPatternOperator imm_type = null_frag, + SDPatternOperator OpNode = null_frag> : + InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16), + !strconcat(opstr, "\t$rt, $rs, $imm16"), + [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> { let isReMaterializable = 1; } -class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, - Operand Od, PatLeaf imm_type, RegisterClass RC> : - FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16), - !strconcat(instr_asm, "\t$rt, $rs, $imm16"), [], IIAlu>; - // Arithmetic Multiply ADD/SUB -let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in -class MArithR<bits<6> func, string instr_asm, SDNode op, bit isComm = 0> : - FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), - !strconcat(instr_asm, "\t$rs, $rt"), - [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul> { - let rd = 0; - let shamt = 0; +class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> : + InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt), + !strconcat(opstr, "\t$rs, $rt"), + [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> { + let Defs = [HI, LO]; + let Uses = [HI, LO]; let isCommutable = isComm; } // Logical -class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: - FR<op, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), - !strconcat(instr_asm, "\t$rd, $rs, $rt"), - [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu> { - let shamt = 0; +class LogicNOR<string opstr, RegisterOperand RC>: + InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt), + !strconcat(opstr, "\t$rd, $rs, $rt"), + [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> { let isCommutable = 1; } // Shifts -class shift_rotate_imm<bits<6> func, bits<5> isRotate, string instr_asm, - SDNode OpNode, PatFrag PF, Operand ImmOpnd, - RegisterClass RC>: - FR<0x00, func, (outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt), - !strconcat(instr_asm, "\t$rd, $rt, $shamt"), - [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu> { - let rs = isRotate; -} - -// 32-bit shift instructions. -class shift_rotate_imm32<bits<6> func, bits<5> isRotate, string instr_asm, - SDNode OpNode>: - shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, CPURegs>; - -class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm, - SDNode OpNode, RegisterClass RC>: - FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt), - !strconcat(instr_asm, "\t$rd, $rt, $rs"), - [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> { - let shamt = isRotate; -} +class shift_rotate_imm<string opstr, Operand ImmOpnd, + RegisterOperand RC, SDPatternOperator OpNode = null_frag, + SDPatternOperator PF = null_frag> : + InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt), + !strconcat(opstr, "\t$rd, $rt, $shamt"), + [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>; + +class shift_rotate_reg<string opstr, RegisterOperand RC, + SDPatternOperator OpNode = null_frag>: + InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt), + !strconcat(opstr, "\t$rd, $rt, $rs"), + [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>; // Load Upper Imediate -class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>: - FI<op, (outs RC:$rt), (ins Imm:$imm16), - !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu>, IsAsCheapAsAMove { - let rs = 0; +class LoadUpper<string opstr, RegisterClass RC, Operand Imm>: + InstSE<(outs RC:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"), + [], IIAlu, FrmI>, IsAsCheapAsAMove { let neverHasSideEffects = 1; let isReMaterializable = 1; } @@ -436,66 +405,34 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, } // Memory Load/Store -let canFoldAsLoad = 1 in -class LoadM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, - Operand MemOpnd, bit Pseudo>: - FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), - !strconcat(instr_asm, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addr:$addr))], IILoad> { - let isPseudo = Pseudo; -} - -class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, - Operand MemOpnd, bit Pseudo>: - FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), - !strconcat(instr_asm, "\t$rt, $addr"), - [(OpNode RC:$rt, addr:$addr)], IIStore> { - let isPseudo = Pseudo; -} - -// 32-bit load. -multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, - bit Pseudo = 0> { - def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } +class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC, + Operand MemOpnd> : + InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(set RC:$rt, (OpNode addr:$addr))], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; + let canFoldAsLoad = 1; } -// 64-bit load. -multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, - bit Pseudo = 0> { - def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } +class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC, + Operand MemOpnd> : + InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; } -// 32-bit store. -multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, - bit Pseudo = 0> { - def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, - Requires<[IsN64, HasStdEnc]> { +multiclass LoadM<string opstr, RegisterClass RC, + SDPatternOperator OpNode = null_frag> { + def NAME : Load<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Load<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } -// 64-bit store. -multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, - bit Pseudo = 0> { - def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, - Requires<[IsN64, HasStdEnc]> { +multiclass StoreM<string opstr, RegisterClass RC, + SDPatternOperator OpNode = null_frag> { + def NAME : Store<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Store<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } @@ -503,81 +440,58 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, // Load/Store Left/Right let canFoldAsLoad = 1 in -class LoadLeftRight<bits<6> op, string instr_asm, SDNode OpNode, - RegisterClass RC, Operand MemOpnd> : - FMem<op, (outs RC:$rt), (ins MemOpnd:$addr, RC:$src), - !strconcat(instr_asm, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addr:$addr, RC:$src))], IILoad> { +class LoadLeftRight<string opstr, SDNode OpNode, RegisterClass RC, + Operand MemOpnd> : + InstSE<(outs RC:$rt), (ins MemOpnd:$addr, RC:$src), + !strconcat(opstr, "\t$rt, $addr"), + [(set RC:$rt, (OpNode addr:$addr, RC:$src))], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; string Constraints = "$src = $rt"; } -class StoreLeftRight<bits<6> op, string instr_asm, SDNode OpNode, - RegisterClass RC, Operand MemOpnd>: - FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), - !strconcat(instr_asm, "\t$rt, $addr"), [(OpNode RC:$rt, addr:$addr)], - IIStore>; - -// 32-bit load left/right. -multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> { - def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem64>, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } -} - -// 64-bit load left/right. -multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> { - def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } +class StoreLeftRight<string opstr, SDNode OpNode, RegisterClass RC, + Operand MemOpnd>: + InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; } -// 32-bit store left/right. -multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> { - def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem64>, - Requires<[IsN64, HasStdEnc]> { +multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> { + def NAME : LoadLeftRight<opstr, OpNode, RC, mem>, + Requires<[NotN64, HasStdEnc]>; + def _P8 : LoadLeftRight<opstr, OpNode, RC, mem64>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } -// 64-bit store left/right. -multiclass StoreLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> { - def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>, - Requires<[IsN64, HasStdEnc]> { +multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterClass RC> { + def NAME : StoreLeftRight<opstr, OpNode, RC, mem>, + Requires<[NotN64, HasStdEnc]>; + def _P8 : StoreLeftRight<opstr, OpNode, RC, mem64>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } // Conditional Branch -class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: - BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $rt, $imm16"), - [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> { +class CBranch<string opstr, PatFrag cond_op, RegisterClass RC> : + InstSE<(outs), (ins RC:$rs, RC:$rt, brtarget:$offset), + !strconcat(opstr, "\t$rs, $rt, $offset"), + [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$offset)], IIBranch, + FrmI> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; let Defs = [AT]; } -class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, - RegisterClass RC>: - BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $imm16"), - [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> { - let rt = _rt; +class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> : + InstSE<(outs), (ins RC:$rs, brtarget:$offset), + !strconcat(opstr, "\t$rs, $offset"), + [(brcond (i32 (cond_op RC:$rs, 0)), bb:$offset)], IIBranch, FrmI> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; @@ -585,27 +499,22 @@ class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, } // SetCC -class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op, - RegisterClass RC>: - FR<op, func, (outs CPURegs:$rd), (ins RC:$rs, RC:$rt), - !strconcat(instr_asm, "\t$rd, $rs, $rt"), - [(set CPURegs:$rd, (cond_op RC:$rs, RC:$rt))], - IIAlu> { - let shamt = 0; -} +class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> : + InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt), + !strconcat(opstr, "\t$rd, $rs, $rt"), + [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>; -class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, - PatLeaf imm_type, RegisterClass RC>: - FI<op, (outs CPURegs:$rt), (ins RC:$rs, Od:$imm16), - !strconcat(instr_asm, "\t$rt, $rs, $imm16"), - [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))], - IIAlu>; +class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type, + RegisterClass RC>: + InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16), + !strconcat(opstr, "\t$rt, $rs, $imm16"), + [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu, FrmI>; // Jump -class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm, - SDPatternOperator operator, SDPatternOperator targetoperator>: - FJ<op, (outs), (ins opnd:$target), !strconcat(instr_asm, "\t$target"), - [(operator targetoperator:$target)], IIBranch> { +class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator, + SDPatternOperator targetoperator> : + InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"), + [(operator targetoperator:$target)], IIBranch, FrmJ> { let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; @@ -614,11 +523,9 @@ class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm, } // Unconditional branch -class UncondBranch<bits<6> op, string instr_asm>: - BranchBase<op, (outs), (ins brtarget:$imm16), - !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> { - let rs = 0; - let rt = 0; +class UncondBranch<string opstr> : + InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"), + [(br bb:$offset)], IIBranch, FrmI> { let isBranch = 1; let isTerminator = 1; let isBarrier = 1; @@ -630,11 +537,7 @@ class UncondBranch<bits<6> op, string instr_asm>: // Base class for indirect branch and return instruction classes. let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>: - FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch> { - let rt = 0; - let rd = 0; - let shamt = 0; -} + InstSE<(outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch, FrmR>; // Indirect branch class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> { @@ -652,205 +555,170 @@ class RetBase<RegisterClass RC>: JumpFR<RC> { // Jump and Link (Call) let isCall=1, hasDelaySlot=1, Defs = [RA] in { - class JumpLink<bits<6> op, string instr_asm>: - FJ<op, (outs), (ins calltarget:$target), - !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], - IIBranch> { - let DecoderMethod = "DecodeJumpTarget"; - } - - class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm, - RegisterClass RC>: - FR<op, func, (outs), (ins RC:$rs), - !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> { - let rt = 0; - let rd = 31; - let shamt = 0; + class JumpLink<string opstr> : + InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"), + [(MipsJmpLink imm:$target)], IIBranch, FrmJ> { + let DecoderMethod = "DecodeJumpTarget"; } - class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>: - FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> { - let rt = _rt; - } + class JumpLinkReg<string opstr, RegisterClass RC>: + InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), + [(MipsJmpLink RC:$rs)], IIBranch, FrmR>; + + class BGEZAL_FT<string opstr, RegisterOperand RO> : + InstSE<(outs), (ins RO:$rs, brtarget:$offset), + !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>; + } +class BAL_FT : + InstSE<(outs), (ins brtarget:$offset), "bal\t$offset", [], IIBranch, FrmI> { + let isBranch = 1; + let isTerminator = 1; + let isBarrier = 1; + let hasDelaySlot = 1; + let Defs = [RA]; +} + +// Sync +let hasSideEffects = 1 in +class SYNC_FT : + InstSE<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)], + NoItinerary, FrmOther>; + // Mul, Div -class Mult<bits<6> func, string instr_asm, InstrItinClass itin, - RegisterClass RC, list<Register> DefRegs>: - FR<0x00, func, (outs), (ins RC:$rs, RC:$rt), - !strconcat(instr_asm, "\t$rs, $rt"), [], itin> { - let rd = 0; - let shamt = 0; +class Mult<string opstr, InstrItinClass itin, RegisterOperand RO, + list<Register> DefRegs> : + InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [], + itin, FrmR> { let isCommutable = 1; let Defs = DefRegs; let neverHasSideEffects = 1; } -class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>: - Mult<func, instr_asm, itin, CPURegs, [HI, LO]>; - -class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin, - RegisterClass RC, list<Register> DefRegs>: - FR<0x00, func, (outs), (ins RC:$rs, RC:$rt), - !strconcat(instr_asm, "\t$$zero, $rs, $rt"), - [(op RC:$rs, RC:$rt)], itin> { - let rd = 0; - let shamt = 0; +class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO, + list<Register> DefRegs> : + InstSE<(outs), (ins RO:$rs, RO:$rt), + !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin, + FrmR> { let Defs = DefRegs; } -class Div32<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: - Div<op, func, instr_asm, itin, CPURegs, [HI, LO]>; - // Move from Hi/Lo -class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC, - list<Register> UseRegs>: - FR<0x00, func, (outs RC:$rd), (ins), - !strconcat(instr_asm, "\t$rd"), [], IIHiLo> { - let rs = 0; - let rt = 0; - let shamt = 0; +class MoveFromLOHI<string opstr, RegisterClass RC, list<Register> UseRegs>: + InstSE<(outs RC:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> { let Uses = UseRegs; let neverHasSideEffects = 1; } -class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, - list<Register> DefRegs>: - FR<0x00, func, (outs), (ins RC:$rs), - !strconcat(instr_asm, "\t$rs"), [], IIHiLo> { - let rt = 0; - let rd = 0; - let shamt = 0; +class MoveToLOHI<string opstr, RegisterClass RC, list<Register> DefRegs>: + InstSE<(outs), (ins RC:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> { let Defs = DefRegs; let neverHasSideEffects = 1; } -class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> : - FMem<opc, (outs RC:$rt), (ins Mem:$addr), - instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> { - let isCodeGenOnly = 1; +class EffectiveAddress<string opstr, RegisterClass RC, Operand Mem> : + InstSE<(outs RC:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), + [(set RC:$rt, addr:$addr)], NoItinerary, FrmI> { + let isCodeGenOnly = 1; + let DecoderMethod = "DecodeMem"; } // Count Leading Ones/Zeros in Word -class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>: - FR<0x1c, func, (outs RC:$rd), (ins RC:$rs), - !strconcat(instr_asm, "\t$rd, $rs"), - [(set RC:$rd, (ctlz RC:$rs))], IIAlu>, - Requires<[HasBitCount, HasStdEnc]> { - let shamt = 0; - let rt = rd; -} +class CountLeading0<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctlz RO:$rs))], IIAlu, FrmR>, + Requires<[HasBitCount, HasStdEnc]>; + +class CountLeading1<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), + [(set RO:$rd, (ctlz (not RO:$rs)))], IIAlu, FrmR>, + Requires<[HasBitCount, HasStdEnc]>; -class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>: - FR<0x1c, func, (outs RC:$rd), (ins RC:$rs), - !strconcat(instr_asm, "\t$rd, $rs"), - [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>, - Requires<[HasBitCount, HasStdEnc]> { - let shamt = 0; - let rt = rd; -} // Sign Extend in Register. -class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt, - RegisterClass RC>: - FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt), - !strconcat(instr_asm, "\t$rd, $rt"), - [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> { - let rs = 0; - let shamt = sa; +class SignExtInReg<string opstr, ValueType vt, RegisterClass RC> : + InstSE<(outs RC:$rd), (ins RC:$rt), !strconcat(opstr, "\t$rd, $rt"), + [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary, FrmR> { let Predicates = [HasSEInReg, HasStdEnc]; } // Subword Swap -class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>: - FR<0x1f, func, (outs RC:$rd), (ins RC:$rt), - !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> { - let rs = 0; - let shamt = sa; +class SubwordSwap<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], + NoItinerary, FrmR> { let Predicates = [HasSwap, HasStdEnc]; let neverHasSideEffects = 1; } // Read Hardware -class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass> - : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd), - "rdhwr\t$rt, $rd", [], IIAlu> { - let rs = 0; - let shamt = 0; -} +class ReadHardware<RegisterClass CPURegClass, RegisterOperand RO> : + InstSE<(outs CPURegClass:$rt), (ins RO:$rd), "rdhwr\t$rt, $rd", [], + IIAlu, FrmR>; // Ext and Ins -class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>: - FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz), - !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), - [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> { - bits<5> pos; - bits<5> sz; - let rd = sz; - let shamt = pos; +class ExtBase<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size), + !strconcat(opstr, " $rt, $rs, $pos, $size"), + [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary, + FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; } -class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>: - FR<0x1f, _funct, (outs RC:$rt), - (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src), - !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), - [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))], - NoItinerary> { - bits<5> pos; - bits<5> sz; - let rd = sz; - let shamt = pos; +class InsBase<string opstr, RegisterOperand RO>: + InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src), + !strconcat(opstr, " $rt, $rs, $pos, $size"), + [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))], + NoItinerary, FrmR> { let Predicates = [HasMips32r2, HasStdEnc]; let Constraints = "$src = $rt"; } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). -class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC, - RegisterClass PRC> : +class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> : PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), - !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; -multiclass Atomic2Ops32<PatFrag Op, string Opstr> { - def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, - Requires<[IsN64, HasStdEnc]> { +multiclass Atomic2Ops32<PatFrag Op> { + def NAME : Atomic2Ops<Op, CPURegs, CPURegs>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Atomic2Ops<Op, CPURegs, CPU64Regs>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } } // Atomic Compare & Swap. -class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC, - RegisterClass PRC> : +class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> : PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), - !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; -multiclass AtomicCmpSwap32<PatFrag Op, string Width> { - def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, - Requires<[NotN64, HasStdEnc]>; - def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, - Requires<[IsN64, HasStdEnc]> { +multiclass AtomicCmpSwap32<PatFrag Op> { + def NAME : AtomicCmpSwap<Op, CPURegs, CPURegs>, + Requires<[NotN64, HasStdEnc]>; + def _P8 : AtomicCmpSwap<Op, CPURegs, CPU64Regs>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; } } -class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : - FMem<Opc, (outs RC:$rt), (ins Mem:$addr), - !strconcat(opstring, "\t$rt, $addr"), [], IILoad> { +class LLBase<string opstr, RegisterOperand RO, Operand Mem> : + InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"), + [], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; let mayLoad = 1; } -class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : - FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr), - !strconcat(opstring, "\t$rt, $addr"), [], IIStore> { +class SCBase<string opstr, RegisterOperand RO, Operand Mem> : + InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr), + !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> { + let DecoderMethod = "DecodeMem"; let mayStore = 1; let Constraints = "$rt = $dst"; } +class MFC3OP<dag outs, dag ins, string asmstr> : + InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>; + //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -859,278 +727,276 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : // Older Macro based SFI Model def SFI_GUARD_LOADSTORE : -MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), - "sfi_load_store_preamble\t$dst, $src1, $src2", []>; +MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_load_store_preamble\t$dst, $src1, $src2">; def SFI_GUARD_INDIRECT_CALL : -MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), - "sfi_indirect_call_preamble\t$dst, $src1, $src2", []>; +MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_indirect_call_preamble\t$dst, $src1, $src2">; def SFI_GUARD_INDIRECT_JMP : -MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), - "sfi_indirect_jump_preamble\t$dst, $src1, $src2", []>; +MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_indirect_jump_preamble\t$dst, $src1, $src2">; def SFI_GUARD_CALL : -MipsPseudo<(outs), (ins), "sfi_call_preamble", []>; +MipsAsmPseudoInst<(outs), (ins), "sfi_call_preamble">; def SFI_GUARD_RETURN : -MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), - "sfi_return_preamble\t$dst, $src1, $src2", []>; +MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_return_preamble\t$dst, $src1, $src2">; def SFI_NOP_IF_AT_BUNDLE_END : -MipsPseudo<(outs), (ins), "sfi_nop_if_at_bundle_end", []>; +MipsAsmPseudoInst<(outs), (ins), "sfi_nop_if_at_bundle_end">; def SFI_DATA_MASK : -MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), - "sfi_data_mask\t$dst, $src1, $src2", []>; +MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_data_mask\t$dst, $src1, $src2">; // @LOCALMOD-END // Return RA. let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in -def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>; +def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>; let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt), - "!ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "!ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; } -// When handling PIC code the assembler needs .cpload and .cprestore -// directives. If the real instructions corresponding these directives -// are used, we have the same behavior, but get also a bunch of warnings -// from the assembler. -let neverHasSideEffects = 1 in -def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp), - ".cprestore\t$loc", []>; - let usesCustomInserter = 1 in { - defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">; - defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16, "load_add_16">; - defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32, "load_add_32">; - defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">; - defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">; - defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">; - defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8, "load_and_8">; - defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16, "load_and_16">; - defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32, "load_and_32">; - defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8, "load_or_8">; - defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16, "load_or_16">; - defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32, "load_or_32">; - defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">; - defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">; - defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">; - defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">; - defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">; - defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">; - - defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8, "swap_8">; - defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16, "swap_16">; - defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32, "swap_32">; - - defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8, "8">; - defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16, "16">; - defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32, "32">; + defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8>; + defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16>; + defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32>; + defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8>; + defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16>; + defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32>; + defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8>; + defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16>; + defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32>; + defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8>; + defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16>; + defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32>; + defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8>; + defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16>; + defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32>; + defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8>; + defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>; + defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>; + + defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8>; + defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16>; + defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32>; + + defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8>; + defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16>; + defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>; } //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// - -class LoadImm32< string instr_asm, Operand Od, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), - !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadImm32Reg : LoadImm32<"li", shamt,CPURegs>; - -class LoadAddress<string instr_asm, Operand MemOpnd, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins MemOpnd:$addr), - !strconcat(instr_asm, "\t$rt, $addr")> ; -def LoadAddr32Reg : LoadAddress<"la", mem, CPURegs>; - -class LoadAddressImm<string instr_asm, Operand Od, RegisterClass RC> : - MipsAsmPseudoInst<(outs RC:$rt), (ins Od:$imm32), - !strconcat(instr_asm, "\t$rt, $imm32")> ; -def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegs>; - //===----------------------------------------------------------------------===// // MipsI Instructions //===----------------------------------------------------------------------===// /// Arithmetic Instructions (ALU Immediate) -def ADDiu : ArithLogicI<0x09, "addiu", add, simm16, immSExt16, CPURegs>, - IsAsCheapAsAMove; -def ADDi : ArithOverflowI<0x08, "addi", add, simm16, immSExt16, CPURegs>; -def SLTi : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16, CPURegs>; -def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>; -def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>; -def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>; -def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>; -def LUi : LoadUpper<0x0f, "lui", CPURegs, uimm16>; +def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>, + ADDI_FM<0x9>, IsAsCheapAsAMove; +def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>; +def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>; +def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>; +def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>, ADDI_FM<0xc>; +def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>, ADDI_FM<0xd>; +def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>, ADDI_FM<0xe>; +def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM; /// Arithmetic Instructions (3-Operand, R-Type) -def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>; -def SUBu : ArithLogicR<0x00, 0x23, "subu", sub, IIAlu, CPURegs>; -def ADD : ArithOverflowR<0x00, 0x20, "add", IIAlu, CPURegs, 1>; -def SUB : ArithOverflowR<0x00, 0x22, "sub", IIAlu, CPURegs>; -def SLT : SetCC_R<0x00, 0x2a, "slt", setlt, CPURegs>; -def SLTu : SetCC_R<0x00, 0x2b, "sltu", setult, CPURegs>; -def AND : ArithLogicR<0x00, 0x24, "and", and, IIAlu, CPURegs, 1>; -def OR : ArithLogicR<0x00, 0x25, "or", or, IIAlu, CPURegs, 1>; -def XOR : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>; -def NOR : LogicNOR<0x00, 0x27, "nor", CPURegs>; +def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>; +def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>; +def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>; +def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>; +def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>; +def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>; +def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>; +def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>; +def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>; +def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>; +def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>; /// Shift Instructions -def SLL : shift_rotate_imm32<0x00, 0x00, "sll", shl>; -def SRL : shift_rotate_imm32<0x02, 0x00, "srl", srl>; -def SRA : shift_rotate_imm32<0x03, 0x00, "sra", sra>; -def SLLV : shift_rotate_reg<0x04, 0x00, "sllv", shl, CPURegs>; -def SRLV : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>; -def SRAV : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>; +def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>, SRA_FM<0, 0>; +def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>, SRA_FM<2, 0>; +def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>, SRA_FM<3, 0>; +def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>; +def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>; +def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>; // Rotate Instructions let Predicates = [HasMips32r2, HasStdEnc] in { - def ROTR : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>; - def ROTRV : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>; + def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>, + SRA_FM<2, 1>; + def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>; } /// Load and Store Instructions /// aligned -defm LB : LoadM32<0x20, "lb", sextloadi8>; -defm LBu : LoadM32<0x24, "lbu", zextloadi8>; -defm LH : LoadM32<0x21, "lh", sextloadi16>; -defm LHu : LoadM32<0x25, "lhu", zextloadi16>; -defm LW : LoadM32<0x23, "lw", load>; -defm SB : StoreM32<0x28, "sb", truncstorei8>; -defm SH : StoreM32<0x29, "sh", truncstorei16>; -defm SW : StoreM32<0x2b, "sw", store>; +defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>; +defm LBu : LoadM<"lbu", CPURegs, zextloadi8>, LW_FM<0x24>; +defm LH : LoadM<"lh", CPURegs, sextloadi16>, LW_FM<0x21>; +defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>; +defm LW : LoadM<"lw", CPURegs, load>, LW_FM<0x23>; +defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>; +defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>; +defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>; /// load/store left/right -defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>; -defm LWR : LoadLeftRightM32<0x26, "lwr", MipsLWR>; -defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>; -defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>; +defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>; +defm LWR : LoadLeftRightM<"lwr", MipsLWR, CPURegs>, LW_FM<0x26>; +defm SWL : StoreLeftRightM<"swl", MipsSWL, CPURegs>, LW_FM<0x2a>; +defm SWR : StoreLeftRightM<"swr", MipsSWR, CPURegs>, LW_FM<0x2e>; -let hasSideEffects = 1 in -def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype", - [(MipsSync imm:$stype)], NoItinerary, FrmOther> -{ - bits<5> stype; - let Opcode = 0; - let Inst{25-11} = 0; - let Inst{10-6} = stype; - let Inst{5-0} = 15; -} +def SYNC : SYNC_FT, SYNC_FM; /// Load-linked, Store-conditional -def LL : LLBase<0x30, "ll", CPURegs, mem>, - Requires<[NotN64, HasStdEnc]>; -def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; +let Predicates = [NotN64, HasStdEnc] in { + def LL : LLBase<"ll", CPURegsOpnd, mem>, LW_FM<0x30>; + def SC : SCBase<"sc", CPURegsOpnd, mem>, LW_FM<0x38>; } -def SC : SCBase<0x38, "sc", CPURegs, mem>, - Requires<[NotN64, HasStdEnc]>; -def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, - Requires<[IsN64, HasStdEnc]> { - let DecoderNamespace = "Mips64"; +let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in { + def LL_P8 : LLBase<"ll", CPURegsOpnd, mem64>, LW_FM<0x30>; + def SC_P8 : SCBase<"sc", CPURegsOpnd, mem64>, LW_FM<0x38>; } /// Jump and Branch Instructions -def J : JumpFJ<0x02, jmptarget, "j", br, bb>, +def J : JumpFJ<jmptarget, "j", br, bb>, FJ<2>, Requires<[RelocStatic, HasStdEnc]>, IsBranch; -def JR : IndirectBranch<CPURegs>; -def B : UncondBranch<0x04, "b">; -def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; -def BNE : CBranch<0x05, "bne", setne, CPURegs>; -def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>; -def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>; -def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>; -def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>; - -let rt = 0, rs = 0, isBranch = 1, isTerminator = 1, isBarrier = 1, - hasDelaySlot = 1, Defs = [RA] in -def BAL_BR: FI<0x1, (outs), (ins brtarget:$imm16), "bal\t$imm16", [], IIBranch>; - -def JAL : JumpLink<0x03, "jal">; -def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>; -def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>; -def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>; -def TAILCALL : JumpFJ<0x02, calltarget, "j", MipsTailCall, imm>, IsTailCall; -def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, IsTailCall; - -def RET : RetBase<CPURegs>; +def JR : IndirectBranch<CPURegs>, MTLO_FM<8>; +def B : UncondBranch<"b">, B_FM; +def BEQ : CBranch<"beq", seteq, CPURegs>, BEQ_FM<4>; +def BNE : CBranch<"bne", setne, CPURegs>, BEQ_FM<5>; +def BGEZ : CBranchZero<"bgez", setge, CPURegs>, BGEZ_FM<1, 1>; +def BGTZ : CBranchZero<"bgtz", setgt, CPURegs>, BGEZ_FM<7, 0>; +def BLEZ : CBranchZero<"blez", setle, CPURegs>, BGEZ_FM<6, 0>; +def BLTZ : CBranchZero<"bltz", setlt, CPURegs>, BGEZ_FM<1, 0>; + +def BAL_BR: BAL_FT, BAL_FM; + +def JAL : JumpLink<"jal">, FJ<3>; +def JALR : JumpLinkReg<"jalr", CPURegs>, JALR_FM; +def BGEZAL : BGEZAL_FT<"bgezal", CPURegsOpnd>, BGEZAL_FM<0x11>; +def BLTZAL : BGEZAL_FT<"bltzal", CPURegsOpnd>, BGEZAL_FM<0x10>; +def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall; +def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, MTLO_FM<8>, IsTailCall; + +def RET : RetBase<CPURegs>, MTLO_FM<8>; /// Multiply and Divide Instructions. -def MULT : Mult32<0x18, "mult", IIImul>; -def MULTu : Mult32<0x19, "multu", IIImul>; -def SDIV : Div32<MipsDivRem, 0x1a, "div", IIIdiv>; -def UDIV : Div32<MipsDivRemU, 0x1b, "divu", IIIdiv>; +def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; +def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; +def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>; +def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>, + MULT_FM<0, 0x1b>; -def MTHI : MoveToLOHI<0x11, "mthi", CPURegs, [HI]>; -def MTLO : MoveToLOHI<0x13, "mtlo", CPURegs, [LO]>; -def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>; -def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>; +def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>; +def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>; +def MFHI : MoveFromLOHI<"mfhi", CPURegs, [HI]>, MFLO_FM<0x10>; +def MFLO : MoveFromLOHI<"mflo", CPURegs, [LO]>, MFLO_FM<0x12>; /// Sign Ext In Register Instructions. -def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>; -def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>; +def SEB : SignExtInReg<"seb", i8, CPURegs>, SEB_FM<0x10, 0x20>; +def SEH : SignExtInReg<"seh", i16, CPURegs>, SEB_FM<0x18, 0x20>; /// Count Leading -def CLZ : CountLeading0<0x20, "clz", CPURegs>; -def CLO : CountLeading1<0x21, "clo", CPURegs>; +def CLZ : CountLeading0<"clz", CPURegsOpnd>, CLO_FM<0x20>; +def CLO : CountLeading1<"clo", CPURegsOpnd>, CLO_FM<0x21>; /// Word Swap Bytes Within Halfwords -def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>; +def WSBH : SubwordSwap<"wsbh", CPURegsOpnd>, SEB_FM<2, 0x20>; -/// No operation -let addr=0 in - def NOP : FJ<0, (outs), (ins), "nop", [], IIAlu>; +/// No operation. +/// FIXME: NOP should be an alias of "sll $0, $0, 0". +def NOP : InstSE<(outs), (ins), "nop", [], IIAlu, FrmJ>, NOP_FM; // FrameIndexes are legalized when they are operands from load/store // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; +def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>; // MADD*/MSUB* -def MADD : MArithR<0, "madd", MipsMAdd, 1>; -def MADDU : MArithR<1, "maddu", MipsMAddu, 1>; -def MSUB : MArithR<4, "msub", MipsMSub>; -def MSUBU : MArithR<5, "msubu", MipsMSubu>; +def MADD : MArithR<"madd", MipsMAdd, 1>, MULT_FM<0x1c, 0>; +def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>; +def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>; +def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>; + +def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM; -// MUL is a assembly macro in the current used ISAs. In recent ISA's -// it is a real instruction. -def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>, - Requires<[HasStdEnc]>; +def EXT : ExtBase<"ext", CPURegsOpnd>, EXT_FM<0>; +def INS : InsBase<"ins", CPURegsOpnd>, EXT_FM<4>; -def RDHWR : ReadHardware<CPURegs, HWRegs>; +/// Move Control Registers From/To CPU Registers +def MFC0_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), + "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>; + +def MTC0_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), + "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>; + +def MFC2_3OP : MFC3OP<(outs CPURegs:$rt), (ins CPURegs:$rd, uimm16:$sel), + "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>; -def EXT : ExtBase<0, "ext", CPURegs>; -def INS : InsBase<4, "ins", CPURegs>; +def MTC2_3OP : MFC3OP<(outs CPURegs:$rd, uimm16:$sel), (ins CPURegs:$rt), + "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst,$src", (ADD CPURegs:$dst,CPURegs:$src,ZERO)>; -def : InstAlias<"bal $offset", (BGEZAL RA,brtarget:$offset)>; -def : InstAlias<"addu $rs,$rt,$imm", - (ADDiu CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"add $rs,$rt,$imm", - (ADDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"and $rs,$rt,$imm", - (ANDi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"j $rs", (JR CPURegs:$rs)>; -def : InstAlias<"not $rt,$rs", (NOR CPURegs:$rt,CPURegs:$rs,ZERO)>; -def : InstAlias<"neg $rt,$rs", (SUB CPURegs:$rt,ZERO,CPURegs:$rs)>; -def : InstAlias<"negu $rt,$rs", (SUBu CPURegs:$rt,ZERO,CPURegs:$rs)>; -def : InstAlias<"slt $rs,$rt,$imm", - (SLTi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; -def : InstAlias<"xor $rs,$rt,$imm", - (XORi CPURegs:$rs,CPURegs:$rt,simm16:$imm)>; +def : InstAlias<"move $dst,$src", (ADDu CPURegsOpnd:$dst, + CPURegsOpnd:$src,ZERO)>, Requires<[NotMips64]>; +def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset)>; +def : InstAlias<"addu $rs, $rt, $imm", + (ADDiu CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; +def : InstAlias<"add $rs, $rt, $imm", + (ADDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; +def : InstAlias<"and $rs, $rt, $imm", + (ANDi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>; +def : InstAlias<"j $rs", (JR CPURegs:$rs)>, Requires<[NotMips64]>; +def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO)>; +def : InstAlias<"neg $rt, $rs", (SUB CPURegsOpnd:$rt, ZERO, CPURegsOpnd:$rs)>; +def : InstAlias<"negu $rt, $rs", (SUBu CPURegsOpnd:$rt, ZERO, + CPURegsOpnd:$rs)>; +def : InstAlias<"slt $rs, $rt, $imm", + (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm)>; +def : InstAlias<"xor $rs, $rt, $imm", + (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm)>, + Requires<[NotMips64]>; +def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegs:$rt, CPURegs:$rd, 0)>; +def : InstAlias<"mtc0 $rt, $rd", (MTC0_3OP CPURegs:$rd, 0, CPURegs:$rt)>; +def : InstAlias<"mfc2 $rt, $rd", (MFC2_3OP CPURegs:$rt, CPURegs:$rd, 0)>; +def : InstAlias<"mtc2 $rt, $rd", (MTC2_3OP CPURegs:$rd, 0, CPURegs:$rt)>; + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions +//===----------------------------------------------------------------------===// + +class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), + !strconcat(instr_asm, "\t$rt, $imm32")> ; +def LoadImm32Reg : LoadImm32<"li", shamt,CPURegsOpnd>; + +class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr), + !strconcat(instr_asm, "\t$rt, $addr")> ; +def LoadAddr32Reg : LoadAddress<"la", mem, CPURegsOpnd>; + +class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32), + !strconcat(instr_asm, "\t$rt, $imm32")> ; +def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>; + + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions @@ -1215,7 +1081,7 @@ def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>; // Mips does not have "not", so we expand our way def : MipsPat<(not CPURegs:$in), - (NOR CPURegs:$in, ZERO)>; + (NOR CPURegsOpnd:$in, ZERO)>; // extended loads let Predicates = [NotN64, HasStdEnc] in { diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 5fd600ebc9..1b2a325d3c 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -17,7 +17,7 @@ #include "MipsRelocations.h" #include "MipsSubtarget.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 1d53a1508e..30f68b156e 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -24,7 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 11eef6591c..0c71596cae 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -13,7 +13,7 @@ #include "MipsSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp index 1ef5632e09..5e4e5e3b8f 100644 --- a/lib/Target/Mips/MipsNaClRewritePass.cpp +++ b/lib/Target/Mips/MipsNaClRewritePass.cpp @@ -28,7 +28,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 36d8971579..98464c7b10 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -25,8 +25,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" #include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -35,7 +36,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f07a10c3dd..c6eb0e1e87 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -331,3 +331,48 @@ def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; // Accumulator Registers def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; + +def CPURegsAsmOperand : AsmOperandClass { + let Name = "CPURegsAsm"; + let ParserMethod = "parseCPURegs"; +} + +def CPU64RegsAsmOperand : AsmOperandClass { + let Name = "CPU64RegsAsm"; + let ParserMethod = "parseCPU64Regs"; +} + +def CCRAsmOperand : AsmOperandClass { + let Name = "CCRAsm"; + let ParserMethod = "parseCCRRegs"; +} + +def CPURegsOpnd : RegisterOperand<CPURegs, "printCPURegs"> { + let ParserMatchClass = CPURegsAsmOperand; +} + +def CPU64RegsOpnd : RegisterOperand<CPU64Regs, "printCPURegs"> { + let ParserMatchClass = CPU64RegsAsmOperand; +} + +def CCROpnd : RegisterOperand<CCR, "printCPURegs"> { + let ParserMatchClass = CCRAsmOperand; +} + +def HWRegsAsmOperand : AsmOperandClass { + let Name = "HWRegsAsm"; + let ParserMethod = "parseHWRegs"; +} + +def HW64RegsAsmOperand : AsmOperandClass { + let Name = "HW64RegsAsm"; + let ParserMethod = "parseHW64Regs"; +} + +def HWRegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> { + let ParserMatchClass = HWRegsAsmOperand; +} + +def HW64RegsOpnd : RegisterOperand<HWRegs, "printCPURegs"> { + let ParserMatchClass = HW64RegsAsmOperand; +} diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index f7603a9d74..60b12337d7 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -22,8 +22,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index a714411f22..cd8f9f41d7 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -90,7 +90,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. if (Mips::CPURegsRegClass.contains(SrcReg)) - Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + Opc = Mips::OR, ZeroReg = Mips::ZERO; else if (Mips::CCRRegClass.contains(SrcReg)) Opc = Mips::CFC1; else if (Mips::FGR32RegClass.contains(SrcReg)) @@ -120,7 +120,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::MOVCCRToCCR; else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. if (Mips::CPU64RegsRegClass.contains(SrcReg)) - Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + Opc = Mips::OR64, ZeroReg = Mips::ZERO_64; else if (SrcReg == Mips::HI64) Opc = Mips::MFHI64, SrcReg = 0; else if (SrcReg == Mips::LO64) @@ -144,11 +144,11 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (DestReg) MIB.addReg(DestReg, RegState::Define); - if (ZeroReg) - MIB.addReg(ZeroReg); - if (SrcReg) MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + if (ZeroReg) + MIB.addReg(ZeroReg); } void MipsSEInstrInfo:: diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 44b1827f8d..abeab7b61b 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -25,9 +25,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -36,7 +37,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 83b58a9b53..da6ca892a1 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -45,15 +45,16 @@ MipsTargetMachine(const Target &T, StringRef TT, Subtarget(TT, CPU, FS, isLittle, RM), DL(isLittle ? (Subtarget.isABI_N64() ? - "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : - "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : + "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-" + "n32:64-S128" : + "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64") : (Subtarget.isABI_N64() ? - "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : - "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), + "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-" + "n32:64-S128" : + "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo(), - STTI(&TLInfo), VTTI(&TLInfo) { + TLInfo(*this), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 8b1a06f320..c4928c21eb 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -20,10 +20,10 @@ #include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { class formatted_raw_ostream; @@ -32,13 +32,11 @@ class MipsRegisterInfo; class MipsTargetMachine : public LLVMTargetMachine { MipsSubtarget Subtarget; const DataLayout DL; // Calculates type size & alignment - const MipsInstrInfo *InstrInfo; - const MipsFrameLowering *FrameLowering; + OwningPtr<const MipsInstrInfo> InstrInfo; + OwningPtr<const MipsFrameLowering> FrameLowering; MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: MipsTargetMachine(const Target &T, StringRef TT, @@ -47,12 +45,12 @@ public: CodeGenOpt::Level OL, bool isLittle); - virtual ~MipsTargetMachine() { delete InstrInfo; } + virtual ~MipsTargetMachine() {} virtual const MipsInstrInfo *getInstrInfo() const - { return InstrInfo; } + { return InstrInfo.get(); } virtual const TargetFrameLowering *getFrameLowering() const - { return FrameLowering; } + { return FrameLowering.get(); } virtual const MipsSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const DataLayout *getDataLayout() const @@ -72,13 +70,6 @@ public: return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } - // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 59d07dae12..4270b79933 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -9,9 +9,9 @@ #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index 243632b20a..3615c146a5 100644 --- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "Mips.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index b29a2962aa..097b50aa4e 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -16,10 +16,10 @@ #define LLVM_TARGET_NVPTX_H #include "MCTargetDesc/NVPTXBaseInfo.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Value.h" #include <cassert> #include <iosfwd> diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 38c7083fea..60f52a46da 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "NVPTXAllocaHoisting.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" namespace llvm { diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 768d514dbb..19d73c5783 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -15,7 +15,7 @@ #define NVPTX_ALLOCA_HOISTING_H_ #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" namespace llvm { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index af07576d59..22da8f33d7 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -29,12 +29,13 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -164,20 +165,14 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - const Constant *PtrVal = CE->getOperand(0); - SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); + APInt OffsetAI(TD.getPointerSizeInBits(), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); - if (Offset == 0) + if (!OffsetAI) return Base; - // Truncate/sext the offset to the pointer size. - if (TD.getPointerSizeInBits() != 64) { - int SExtAmount = 64-TD.getPointerSizeInBits(); - Offset = (Offset << SExtAmount) >> SExtAmount; - } - + int64_t Offset = OffsetAI.getSExtValue(); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); } @@ -1521,8 +1516,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, continue; } - if (PAL.getParamAttributes(paramIndex+1). - hasAttribute(Attributes::ByVal) == false) { + if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) { // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 29db4abe6c..42498f0bf7 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -21,7 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index d68e3b6871..36ab7f5c15 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -13,8 +13,8 @@ #include "NVPTXISelDAGToDAG.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instructions.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 29e4f17962..14f2091a3f 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -18,7 +18,7 @@ #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 4e9d68687a..b3ab9fc9d0 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -23,13 +23,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1022,7 +1022,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { + if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) { // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index cd50deb26a..6fe654cb49 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -16,7 +16,7 @@ #include "NVPTXTargetMachine.h" #define GET_INSTRINFO_CTOR #include "NVPTXGenInstrInfo.inc" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 8e40c965bf..f7fa7aa61d 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/InstIterator.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index 9bafce274e..286e753fa9 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -16,7 +16,7 @@ #define NVPTX_LOWER_AGGR_COPIES_H #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" namespace llvm { diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index 9c832e1b51..e166be5a68 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -14,7 +14,7 @@ #ifndef LLVM_NVPTXSECTION_H #define LLVM_NVPTXSECTION_H -#include "llvm/GlobalVariable.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCSection.h" #include <vector> @@ -38,6 +38,8 @@ public: virtual bool isBaseAddressKnownZero() const { return true; } virtual bool UseCodeAlign() const { return false; } virtual bool isVirtualSection() const { return false; } + virtual std::string getLabelBeginName() const { return ""; } + virtual std::string getLabelEndName() const { return ""; } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp index 6171292eeb..babe29500d 100644 --- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -13,10 +13,10 @@ #include "NVPTXSplitBBatBar.h" #include "NVPTXUtilities.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/InstIterator.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 0867a4e01e..b4e049ea3e 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" @@ -35,7 +35,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -72,8 +71,7 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), - STTI(&TLInfo), VTTI(&TLInfo) + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index b4763a5bae..1a732be1ad 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -21,11 +21,10 @@ #include "NVPTXInstrInfo.h" #include "NVPTXRegisterInfo.h" #include "NVPTXSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -45,9 +44,6 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; - //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); @@ -76,12 +72,6 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index fff3f43bb0..1ccc9f7c02 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -12,11 +12,11 @@ #include "NVPTXUtilities.h" #include "NVPTX.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include <algorithm> #include <cstring> #include <map> diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 5ea7bc8640..247e09b8bc 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -14,10 +14,10 @@ #ifndef NVPTXUTILITIES_H #define NVPTXUTILITIES_H -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Value.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" #include <cstdarg> #include <set> #include <string> diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp index f3624b9f23..6c801b875e 100644 --- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp +++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp index e67e2e42b3..f1b285dd78 100644 --- a/lib/Target/NVPTX/VectorElementize.cpp +++ b/lib/Target/NVPTX/VectorElementize.cpp @@ -43,15 +43,15 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Constant.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" using namespace llvm; @@ -243,7 +243,7 @@ void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, std::vector<MachineInstr *>& copies) { copies.push_back(F.CloneMachineInstr(Instr)); - MachineInstr *copy=copies[0]; + MachineInstrBuilder copy(F, copies[0]); copy->setDesc(InstrInfo->get(getScalarVersion(copy))); // Remove the dest, that should be a vector operand. @@ -260,12 +260,11 @@ void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) copy->RemoveOperand(0); - for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) { - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); - } + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) + copy.addReg(scalarRegs[i], RegState::Define); for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy->addOperand(otherOperands[i]); + copy.addOperand(otherOperands[i]); } @@ -280,7 +279,7 @@ void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, std::vector<MachineInstr *>& copies) { copies.push_back(F.CloneMachineInstr(Instr)); - MachineInstr *copy=copies[0]; + MachineInstrBuilder copy(F, copies[0]); copy->setDesc(InstrInfo->get(getScalarVersion(copy))); MachineOperand src = copy->getOperand(0); @@ -297,10 +296,10 @@ void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, copy->RemoveOperand(0); for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false)); + copy.addReg(scalarRegs[i]); for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy->addOperand(otherOperands[i]); + copy.addOperand(otherOperands[i]); } ///============================================================================= @@ -327,8 +326,8 @@ void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, DebugLoc DL = Instr->getDebugLoc(); for (unsigned i=0; i<numcopies; i++) { - MachineInstr *copy = BuildMI(F, DL, - InstrInfo->get(getScalarVersion(Instr)), dest[i]); + MachineInstrBuilder copy = + BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]); MachineOperand which=Instr->getOperand(3+i); assert(which.isImm() && "Shuffle operand not a constant"); @@ -336,9 +335,9 @@ void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, int elem=src%numcopies; if (which.getImm() < numcopies) - copy->addOperand(MachineOperand::CreateReg(src1[elem], false)); + copy.addReg(src1[elem]); else - copy->addOperand(MachineOperand::CreateReg(src2[elem], false)); + copy.addReg(src2[elem]); copies.push_back(copy); } } @@ -358,12 +357,9 @@ void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, assert(which.isImm() && "Extract operand not a constant"); DebugLoc DL = Instr->getDebugLoc(); - - MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), - Instr->getOperand(0).getReg()); - copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false)); - - copies.push_back(copy); + copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), + Instr->getOperand(0).getReg()) + .addReg(src[which.getImm()])); } ///============================================================================= @@ -389,13 +385,13 @@ void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, DebugLoc DL = Instr->getDebugLoc(); for (unsigned i=0; i<numcopies; i++) { - MachineInstr *copy = BuildMI(F, DL, - InstrInfo->get(getScalarVersion(Instr)), dest[i]); + MachineInstrBuilder copy = + BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), dest[i]); if (i != elem) - copy->addOperand(MachineOperand::CreateReg(src[i], false)); + copy.addReg(src[i]); else - copy->addOperand(Instr->getOperand(2)); + copy.addOperand(Instr->getOperand(2)); copies.push_back(copy); } @@ -418,15 +414,10 @@ void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, DebugLoc DL = Instr->getDebugLoc(); - for (unsigned i=0; i<numcopies; i++) { - MachineInstr *copy = BuildMI(F, DL, - InstrInfo->get(getScalarVersion(Instr)), dest[i]); - - copy->addOperand(Instr->getOperand(1+i)); - - copies.push_back(copy); - } - + for (unsigned i=0; i<numcopies; i++) + copies.push_back(BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), + dest[i]) + .addOperand(Instr->getOperand(1+i))); } ///============================================================================= @@ -439,7 +430,7 @@ void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, std::vector<MachineInstr *>& copies) { copies.push_back(F.CloneMachineInstr(Instr)); - MachineInstr *copy=copies[0]; + MachineInstrBuilder copy(F, copies[0]); copy->setDesc(InstrInfo->get(getScalarVersion(copy))); // Remove the dest, that should be a vector operand. @@ -457,10 +448,10 @@ void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, copy->RemoveOperand(0); for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); + copy.addReg(scalarRegs[i], RegState::Define); for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) - copy->addOperand(otherOperands[i]); + copy.addOperand(otherOperands[i]); } ///============================================================================= @@ -504,7 +495,7 @@ void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, copies.push_back(F.CloneMachineInstr(Instr)); for (unsigned i=0; i<numcopies; ++i) { - MachineInstr *copy = copies[i]; + MachineInstrBuilder copy(F, copies[i]); std::vector<MachineOperand> allOperands; std::vector<bool> isDef; @@ -530,13 +521,13 @@ void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, if (isVectorRegister(regnum)) { SmallVector<unsigned, 4> scalarRegs = getScalarRegisters(regnum); - copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j])); + copy.addReg(scalarRegs[i], getDefRegState(isDef[j])); } else - copy->addOperand(oper); + copy.addOperand(oper); } else - copy->addOperand(oper); + copy.addOperand(oper); } } } @@ -659,7 +650,7 @@ unsigned VectorElementize::copyProp(MachineFunction &F) { for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) Instr->RemoveOperand(0); for (unsigned i=0, e=operands.size(); i!=e; ++i) - Instr->addOperand(operands[i]); + Instr->addOperand(F, operands[i]); } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 631f749581..f24edf62ed 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -32,6 +32,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_8: case PPC::fixup_ppc_toc: case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: return Value; case PPC::fixup_ppc_lo14: case PPC::fixup_ppc_toc16_ds: @@ -85,7 +86,8 @@ public: { "fixup_ppc_toc", 0, 64, 0 }, { "fixup_ppc_toc16", 16, 16, 0 }, { "fixup_ppc_toc16_ds", 16, 14, 0 }, - { "fixup_ppc_tlsreg", 0, 0, 0 } + { "fixup_ppc_tlsreg", 0, 0, 0 }, + { "fixup_ppc_nofixup", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) @@ -117,7 +119,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 462d7072b5..d61e741ef5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" @@ -33,9 +34,25 @@ namespace { const MCFixup &Fixup, bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs); + }; + + class PPCELFRelocationEntry : public ELFRelocationEntry { + public: + PPCELFRelocationEntry(const ELFRelocationEntry &RE); + bool operator<(const PPCELFRelocationEntry &RE) const { + return (RE.r_offset < r_offset || + (RE.r_offset == r_offset && RE.Type > Type)); + } }; } +PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) + : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, + RE.r_addend, *RE.Fixup) {} + PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_br24: Type = ELF::R_PPC_REL24; break; + case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; break; + case FK_Data_8: + case FK_PCRel_8: + Type = ELF::R_PPC64_REL64; + break; } } else { switch ((unsigned)Fixup.getKind()) { @@ -79,12 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL16_HA: Type = ELF::R_PPC_TPREL16_HA; break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + Type = ELF::R_PPC64_DTPREL16_HA; + break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_HA; break; case MCSymbolRefExpr::VK_PPC_TOC16_HA: Type = ELF::R_PPC64_TOC16_HA; break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + Type = ELF::R_PPC64_GOT_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + Type = ELF::R_PPC64_GOT_TLSGD16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + Type = ELF::R_PPC64_GOT_TLSLD16_HA; + break; } break; case PPC::fixup_ppc_lo16: @@ -93,12 +127,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL16_LO: Type = ELF::R_PPC_TPREL16_LO; break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_LO; break; case MCSymbolRefExpr::VK_PPC_TOC16_LO: Type = ELF::R_PPC64_TOC16_LO; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + Type = ELF::R_PPC64_GOT_TLSGD16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; } break; case PPC::fixup_ppc_lo14: @@ -119,14 +162,25 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC16_LO: Type = ELF::R_PPC64_TOC16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS: - Type = ELF::R_PPC64_GOT_TPREL16_DS; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; break; } break; case PPC::fixup_ppc_tlsreg: Type = ELF::R_PPC64_TLS; break; + case PPC::fixup_ppc_nofixup: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TLSGD: + Type = ELF::R_PPC64_TLSGD; + break; + case MCSymbolRefExpr::VK_PPC_TLSLD: + Type = ELF::R_PPC64_TLSLD; + break; + } + break; case FK_Data_8: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -191,6 +245,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } } +// The standard sorter only sorts on the r_offset field, but PowerPC can +// have multiple relocations at the same offset. Sort secondarily on the +// relocation type to avoid nondeterminism. +void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + + // Copy to a temporary vector of relocation entries having a different + // sort function. + std::vector<PPCELFRelocationEntry> TmpRelocs; + + for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); + R != Relocs.end(); ++R) { + TmpRelocs.push_back(PPCELFRelocationEntry(*R)); + } + + // Sort in place by ascending r_offset and descending r_type. + array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); + + // Copy back to the original vector. + unsigned I = 0; + for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); + R != TmpRelocs.end(); ++R, ++I) { + Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, + R->Symbol, R->r_addend, *R->Fixup); + } +} + + MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 75bb851630..7917f7736e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -47,6 +47,10 @@ enum Fixups { /// fixup_ppc_tlsreg - Insert thread-pointer register number. fixup_ppc_tlsreg, + + /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call + /// to __tls_get_addr for the TLS general and local dynamic models. + fixup_ppc_nofixup, // Marker LastTargetFixupKind, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 5b208d41f4..d048426d43 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -62,8 +63,6 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, @@ -82,11 +81,12 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); - // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the + // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the // following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF) + if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF || + Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) Size = 8; // Output the constant in big endian byte order. @@ -119,6 +119,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_br24)); + + // For special TLS calls, add another fixup for the symbol. Apparently + // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently + // similar that TblGen will not generate a separate case for the latter + // two, so this is the only way to get the extra fixup generated. + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) { + const MCOperand &MO2 = MI.getOperand(OpNo+1); + Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), + (MCFixupKind)PPC::fixup_ppc_nofixup)); + } return 0; } @@ -199,17 +210,6 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, } -unsigned PPCMCCodeEmitter::getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - const MCOperand &MO = MI.getOperand(OpNo); - - // Add a fixup for the GOT displacement to the TLS block offset. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16_ds)); - return 0; -} - - unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); @@ -223,7 +223,6 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, return getPPCRegisterNumbering(PPC::X13); } - unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index a0e4cf3005..4a420929d0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -14,6 +14,9 @@ #ifndef PPCMCTARGETDESC_H #define PPCMCTARGETDESC_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + #include "llvm/Support/DataTypes.h" namespace llvm { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index f872e861bf..972e13852e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -14,6 +14,9 @@ #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H #define LLVM_TARGET_POWERPC_PPCPREDICATES_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index bbd247f20f..e6d38ebf21 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -72,9 +72,7 @@ namespace llvm { MO_HA16 = 2 << 5, MO_TPREL16_HA = 3 << 5, - MO_TPREL16_LO = 4 << 5, - MO_GOT_TPREL16_DS = 5 << 5, - MO_TLS = 6 << 5 + MO_TPREL16_LO = 4 << 5 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4315bc1a07..839f918a06 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -32,9 +32,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -44,7 +45,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -426,20 +426,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsExternal = false; bool IsFunction = false; bool IsCommon = false; + bool IsAvailExt = false; if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = Mang->getSymbol(GValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); - IsCommon = GVar && GValue->hasCommonLinkage(); + IsCommon = GVar && RealGValue->hasCommonLinkage(); IsFunction = !GVar; + IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - if (IsExternal || IsFunction || IsCommon || MO.isJTI()) + if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -466,10 +471,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); else { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = Mang->getSymbol(GValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); - if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage()) + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -496,8 +505,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = Mang->getSymbol(GValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); IsFunction = !GVar; } else if (MO.isCPI()) @@ -513,8 +525,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } - case PPC::LDgotTPREL: { - // Transform %Xd = LDgotTPREL <ga:@sym>, %Xs + case PPC::ADDISgotTprelHA: { + // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTprel)); + return; + } + case PPC::LDgotTprelL: { + // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LDrs, which is a form of LD with the offset @@ -524,12 +552,148 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); return; } + case PPC::ADDIStlsgdHA: { + // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::ADDItlsgdL: { + // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::GETtlsADDR: { + // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> + // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDIStlsldHA: { + // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::ADDItlsldL: { + // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::GETtlsldADDR: { + // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> + // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDISdtprelHA: { + // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> + // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X3) + .addExpr(SymDtprel)); + return; + } + case PPC::ADDIdtprelL: { + // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@dtprel@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); + return; + } case PPC::MFCRpseud: case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 @@ -568,14 +732,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - 8/*size*/, 0/*addrspace*/); + 8 /*size*/); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOC, OutContext), - 8/*size*/, 0/*addrspace*/); + 8/*size*/); // Emit a null environment pointer. - OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); + OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -604,6 +768,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } + MachineModuleInfoELF &MMIELF = + MMI->getObjFileInfo<MachineModuleInfoELF>(); + + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + return AsmPrinter::doFinalization(M); } @@ -867,7 +1050,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/); else // Internal to current translation unit. // @@ -877,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); @@ -896,7 +1079,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 05db2c5e73..a74932c1e1 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -43,7 +43,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 0dcb5deb5d..d68bfd12e4 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -68,7 +68,6 @@ namespace { unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getTLSOffsetEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; const char *getPassName() const { return "PowerPC Machine Code Emitter"; } @@ -245,13 +244,6 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, } -unsigned PPCCodeEmitter::getTLSOffsetEncoding(const MachineInstr &MI, - unsigned OpNo) const { - llvm_unreachable("TLS not supported on the old JIT."); - return 0; -} - - unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const { llvm_unreachable("TLS not supported on the old JIT."); diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index fdd85ff11f..5901f36a2f 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -198,8 +198,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. - bool DisableRedZone = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::NoRedZone); + bool DisableRedZone = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can // still generate stackless code if all local vars are reg-allocated. // Try: (FrameSize <= 224 @@ -261,7 +261,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index abcaa9cab0..762b3467c3 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -21,11 +21,12 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1296,14 +1297,18 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GValue = G->getGlobal(); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); - assert((GVar || isa<Function>(GValue)) && + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + assert((GVar || isa<Function>(RealGValue)) && "Unexpected global value subclass!"); // An external variable is one without an initializer. For these, // for variables with common linkage, and for Functions, generate // the LDtocL form. - if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage()) + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } @@ -1311,11 +1316,6 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA); } - case PPCISD::LD_GOT_TPREL: { - assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); - return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64, - N->getOperand(0), N->getOperand(1)); - } } return SelectCode(N); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1168171b23..9966b2cc90 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17,7 +17,6 @@ #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -25,10 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -376,6 +376,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -442,6 +443,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? @@ -578,8 +581,17 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; - case PPCISD::LD_GOT_TPREL: return "PPCISD::LD_GOT_TPREL"; + case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; + case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; + case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; + case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; + case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; + case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; + case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; + case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; } } @@ -1342,18 +1354,65 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, if (!is64bit) llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model != TLSModel::InitialExec) - llvm_unreachable("only local-exec and initial-exec TLS modes supported"); - - SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_GOT_TPREL16_DS); - SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLS); - SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); - SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT, - GOTOffset, GOTReg); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg); + if (Model == TLSModel::InitialExec) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, + PtrVT, TGA, TPOffsetHi); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + } + + if (Model == TLSModel::GeneralDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLS_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); + } + + if (Model == TLSModel::LocalDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLSLD_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, + Chain, ParmReg, TGA); + return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); + } + + llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, @@ -6763,8 +6822,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && MFI->getStackSize() && - !MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::Naked); + !MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : (is31 ? PPC::R31 : PPC::R1); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, @@ -6787,16 +6846,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f54a8b77a4..12b3df7c9a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -178,11 +178,16 @@ namespace llvm { CR6SET, CR6UNSET, - /// G8RC = LD_GOT_TPREL Symbol, G8RReg - Used by the initial-exec + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec + /// TLS model, produces an ADDIS8 instruction that adds the GOT + /// base to sym@got@tprel@ha. + ADDIS_GOT_TPREL_HA, + + /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym@got@tprel. The latter identifies the GOT entry - /// containing the offset of "sym" relative to the thread pointer. - LD_GOT_TPREL, + /// and offset sym@got@tprel@l. This completes the addition that + /// finds the offset of "sym" relative to the thread pointer. + LD_GOT_TPREL_L, /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS /// model, produces an ADD instruction that adds the contents of @@ -192,6 +197,46 @@ namespace llvm { /// TLS sequence. ADD_TLS, + /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsgd@ha. + ADDIS_TLSGD_HA, + + /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsgd@l. + ADDI_TLSGD_L, + + /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsgd). + GET_TLS_ADDR, + + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsld@ha. + ADDIS_TLSLD_HA, + + /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsld@l. + ADDI_TLSLD_L, + + /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsld). + GET_TLSLD_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the + /// local-dynamic TLS model, produces an ADDIS8 instruction + /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// to tie this in place following a copy to %X3 from the result + /// of a GET_TLSLD_ADDR. + ADDIS_DTPREL_HA, + + /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@dtprel@l. + ADDI_DTPREL_L, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -386,16 +431,15 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index dff15664a3..1dd5415733 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -37,12 +37,10 @@ def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 let EncoderMethod = "getMemRIXEncoding"; let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); } -def tlsaddr : Operand<i64> { - let EncoderMethod = "getTLSOffsetEncoding"; -} def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; } +def tlsgd : Operand<i64> {} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -110,6 +108,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; + let isCodeGenOnly = 1 in + def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; @@ -373,7 +381,7 @@ def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), - "add $rT, $rA, $rB", IntSimple, + "add $rT, $rA, $rB@tls", IntSimple, [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>; let Defs = [CARRY] in { @@ -709,13 +717,60 @@ def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64; // Support for thread-local storage. -def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg), - "#LDgotTPREL", - [(set G8RC:$rD, - (PPCldGotTprel G8RC:$reg, tglobaltlsaddr:$disp))]>, - isPPC64; +def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDISgotTprelHA", + [(set G8RC:$rD, + (PPCaddisGotTprelHA G8RC:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg), + "#LDgotTprelL", + [(set G8RC:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>, + isPPC64; def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g), (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDIStlsgdHA", + [(set G8RC:$rD, + (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDItlsgdL", + [(set G8RC:$rD, + (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set G8RC:$rD, + (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDIStlsldHA", + [(set G8RC:$rD, + (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDItlsldL", + [(set G8RC:$rD, + (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set G8RC:$rD, + (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDISdtprelHA", + [(set G8RC:$rD, + (PPCaddisDtprelHA G8RC:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDIdtprelL", + [(set G8RC:$rD, + (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; let PPC970_Unit = 2 in { // Truncating stores. diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a29f40ad53..8c077b7517 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -91,8 +91,19 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; -def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>; +def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; +def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, + [SDNPMayLoad]>; def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; +def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; +def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; +def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, + [SDNPHasChain]>; +def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 62d15b371f..851de1778a 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -15,7 +15,7 @@ #include "PPCJITInfo.h" #include "PPCRelocations.h" #include "PPCTargetMachine.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 1c4af901f7..73f7a2cfd5 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -114,12 +114,6 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; break; - case PPCII::MO_GOT_TPREL16_DS: - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS; - break; - case PPCII::MO_TLS: - RefKind = MCSymbolRefExpr::VK_PPC_TLS; - break; } // FIXME: This isn't right, but we don't have a good way to express this in diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8d1ba23c11..378c147331 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -21,7 +21,6 @@ #include "PPCSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -29,8 +28,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -40,7 +41,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #include <cstdlib> #define GET_REGINFO_TARGET_DESC @@ -597,7 +597,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 71eff4c52a..d9b4e3061e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -14,7 +14,7 @@ #include "PPCSubtarget.h" #include "PPC.h" #include "PPCRegisterInfo.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 22a78c5f1f..b8b7882ac0 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 0267ed1f23..d917d99ded 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -20,9 +20,8 @@ #include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -66,12 +63,6 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index 5dc8568d83..fa44331b8a 100644 --- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h new file mode 100644 index 0000000000..0f5125d39b --- /dev/null +++ b/lib/Target/R600/AMDGPU.h @@ -0,0 +1,49 @@ +//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_H +#define AMDGPU_H + +#include "AMDGPUTargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class FunctionPass; +class AMDGPUTargetMachine; + +// R600 Passes +FunctionPass* createR600KernelParametersPass(const DataLayout *TD); +FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); + +// SI Passes +FunctionPass *createSIAnnotateControlFlowPass(); +FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); +FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); +FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm); + +// Passes common to R600 and SI +Pass *createAMDGPUStructurizeCFGPass(); +FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); + +} // End namespace llvm + +namespace ShaderType { + enum Type { + PIXEL = 0, + VERTEX = 1, + GEOMETRY = 2, + COMPUTE = 3 + }; +} + +#endif // AMDGPU_H diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td new file mode 100644 index 0000000000..40f474161a --- /dev/null +++ b/lib/Target/R600/AMDGPU.td @@ -0,0 +1,40 @@ +//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +// Include AMDIL TD files +include "AMDILBase.td" + + +def AMDGPUInstrInfo : InstrInfo { + let guessInstructionProperties = 1; +} + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// +def AMDGPUAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int Variant = 0; + bit isMCAsmWriter = 1; +} + +def AMDGPU : Target { + // Pull in Instruction Info: + let InstructionSet = AMDGPUInstrInfo; + let AssemblyWriters = [AMDGPUAsmWriter]; +} + +// Include AMDGPU TD files +include "R600Schedule.td" +include "SISchedule.td" +include "Processors.td" +include "AMDGPUInstrInfo.td" +include "AMDGPUIntrinsics.td" +include "AMDGPURegisterInfo.td" +include "AMDGPUInstructions.td" diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp new file mode 100644 index 0000000000..754506c837 --- /dev/null +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -0,0 +1,138 @@ +//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// The AMDGPUAsmPrinter is used to print both assembly string and also binary +/// code. When passed an MCAsmStreamer it prints assembly and when passed +/// an MCObjectStreamer it outputs binary code. +// +//===----------------------------------------------------------------------===// +// + + +#include "AMDGPUAsmPrinter.h" +#include "AMDGPU.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + + +static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, + MCStreamer &Streamer) { + return new AMDGPUAsmPrinter(tm, Streamer); +} + +extern "C" void LLVMInitializeR600AsmPrinter() { + TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); +} + +/// We need to override this function so we can avoid +/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. +bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); + if (STM.dumpCode()) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + MF.dump(); +#endif + } + SetupMachineFunction(MF); + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); + if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + EmitProgramInfo(MF); + } + EmitFunctionBody(); + return false; +} + +void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { + unsigned MaxSGPR = 0; + unsigned MaxVGPR = 0; + bool VCCUsed = false; + const SIRegisterInfo * RI = + static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + + unsigned numOperands = MI.getNumOperands(); + for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { + MachineOperand & MO = MI.getOperand(op_idx); + unsigned maxUsed; + unsigned width = 0; + bool isSGPR = false; + unsigned reg; + unsigned hwReg; + if (!MO.isReg()) { + continue; + } + reg = MO.getReg(); + if (reg == AMDGPU::VCC) { + VCCUsed = true; + continue; + } + switch (reg) { + default: break; + case AMDGPU::EXEC: + case AMDGPU::SI_LITERAL_CONSTANT: + case AMDGPU::SREG_LIT_0: + case AMDGPU::M0: + continue; + } + + if (AMDGPU::SReg_32RegClass.contains(reg)) { + isSGPR = true; + width = 1; + } else if (AMDGPU::VReg_32RegClass.contains(reg)) { + isSGPR = false; + width = 1; + } else if (AMDGPU::SReg_64RegClass.contains(reg)) { + isSGPR = true; + width = 2; + } else if (AMDGPU::VReg_64RegClass.contains(reg)) { + isSGPR = false; + width = 2; + } else if (AMDGPU::SReg_128RegClass.contains(reg)) { + isSGPR = true; + width = 4; + } else if (AMDGPU::VReg_128RegClass.contains(reg)) { + isSGPR = false; + width = 4; + } else if (AMDGPU::SReg_256RegClass.contains(reg)) { + isSGPR = true; + width = 8; + } else { + assert(!"Unknown register class"); + } + hwReg = RI->getEncodingValue(reg); + maxUsed = hwReg + width - 1; + if (isSGPR) { + MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; + } else { + MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; + } + } + } + } + if (VCCUsed) { + MaxSGPR += 2; + } + SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); + OutStreamer.EmitIntValue(MaxSGPR + 1, 4); + OutStreamer.EmitIntValue(MaxVGPR + 1, 4); + OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4); +} diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h new file mode 100644 index 0000000000..3812282b17 --- /dev/null +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -0,0 +1,44 @@ +//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_ASMPRINTER_H +#define AMDGPU_ASMPRINTER_H + +#include "llvm/CodeGen/AsmPrinter.h" + +namespace llvm { + +class AMDGPUAsmPrinter : public AsmPrinter { + +public: + explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "AMDGPU Assembly Printer"; + } + + /// \brief Emit register usage information so that the GPU driver + /// can correctly setup the GPU state. + void EmitProgramInfo(MachineFunction &MF); + + /// Implemented in AMDGPUMCInstLower.cpp + virtual void EmitInstruction(const MachineInstr *MI); +}; + +} // End anonymous llvm + +#endif //AMDGPU_ASMPRINTER_H diff --git a/lib/Target/R600/AMDGPUCodeEmitter.h b/lib/Target/R600/AMDGPUCodeEmitter.h new file mode 100644 index 0000000000..84f3588496 --- /dev/null +++ b/lib/Target/R600/AMDGPUCodeEmitter.h @@ -0,0 +1,49 @@ +//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief CodeEmitter interface for R600 and SI codegen. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUCODEEMITTER_H +#define AMDGPUCODEEMITTER_H + +namespace llvm { + +class AMDGPUCodeEmitter { +public: + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { return 0; } + virtual unsigned GPR4AlignEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual unsigned GPR2AlignEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual uint64_t VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const { + return Value; + } + virtual uint64_t i32LiteralEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) + const { + return 0; + } +}; + +} // End namespace llvm + +#endif // AMDGPUCODEEMITTER_H diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp new file mode 100644 index 0000000000..50297d1f60 --- /dev/null +++ b/lib/Target/R600/AMDGPUConvertToISA.cpp @@ -0,0 +1,62 @@ +//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass lowers AMDIL machine instructions to the appropriate +/// hardware instructions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +namespace { + +class AMDGPUConvertToISAPass : public MachineFunctionPass { + +private: + static char ID; + TargetMachine &TM; + +public: + AMDGPUConvertToISAPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const {return "AMDGPU Convert to ISA";} + +}; + +} // End anonymous namespace + +char AMDGPUConvertToISAPass::ID = 0; + +FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { + return new AMDGPUConvertToISAPass(tm); +} + +bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUInstrInfo * TII = + static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); + } + } + return false; +} diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp new file mode 100644 index 0000000000..473dac4ddc --- /dev/null +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -0,0 +1,417 @@ +//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This is the parent TargetLowering class for hardware code gen +/// targets. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUISelLowering.h" +#include "AMDILIntrinsicInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +using namespace llvm; + +AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : + TargetLowering(TM, new TargetLoweringObjectFileELF()) { + + // Initialize target lowering borrowed from AMDIL + InitAMDILLowering(); + + // We need to custom lower some of the intrinsics + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Library functions. These default to Expand, but we have instructions + // for them. + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FEXP2, MVT::f32, Legal); + setOperationAction(ISD::FPOW, MVT::f32, Legal); + setOperationAction(ISD::FLOG2, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + + // Lower floating point store/load to integer store/load to reduce the number + // of patterns in tablegen. + setOperationAction(ISD::STORE, MVT::f32, Promote); + AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); + + setOperationAction(ISD::STORE, MVT::v4f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); + + setOperationAction(ISD::LOAD, MVT::f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); + + setOperationAction(ISD::LOAD, MVT::v4f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Expand); +} + +//===---------------------------------------------------------------------===// +// TargetLowering Callbacks +//===---------------------------------------------------------------------===// + +SDValue AMDGPUTargetLowering::LowerFormalArguments( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + for (unsigned i = 0, e = Ins.size(); i < e; ++i) { + InVals.push_back(SDValue()); + } + return Chain; +} + +SDValue AMDGPUTargetLowering::LowerReturn( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const { + return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); +} + +//===---------------------------------------------------------------------===// +// Target specific lowering +//===---------------------------------------------------------------------===// + +SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) + const { + switch (Op.getOpcode()) { + default: + Op.getNode()->dump(); + assert(0 && "Custom lowering code for this" + "instruction is not implemented yet!"); + break; + // AMDIL DAG lowering + case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::SREM: return LowerSREM(Op, DAG); + case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + // AMDGPU DAG lowering + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); + } + return Op; +} + +SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + switch (IntrinsicID) { + default: return Op; + case AMDGPUIntrinsic::AMDIL_abs: + return LowerIntrinsicIABS(Op, DAG); + case AMDGPUIntrinsic::AMDIL_exp: + return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_lrp: + return LowerIntrinsicLRP(Op, DAG); + case AMDGPUIntrinsic::AMDIL_fraction: + return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDIL_mad: + return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDIL_max: + return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_imax: + return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umax: + return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDIL_min: + return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_imin: + return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umin: + return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDIL_round_nearest: + return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); + } +} + +///IABS(a) = SMAX(sub(0, a), a) +SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, + SelectionDAG &DAG) const { + + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + Op.getOperand(1)); + + return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); +} + +/// Linear Interpolation +/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) +SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, + DAG.getConstantFP(1.0f, MVT::f32), + Op.getOperand(1)); + SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, + Op.getOperand(3)); + return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1), + Op.getOperand(2), + OneSubAC); +} + +/// \brief Generate Min/Max node +SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue True = Op.getOperand(2); + SDValue False = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + + if (VT != MVT::f32 || + !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { + return SDValue(); + } + + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); + switch (CCOpcode) { + case ISD::SETOEQ: + case ISD::SETONE: + case ISD::SETUNE: + case ISD::SETNE: + case ISD::SETUEQ: + case ISD::SETEQ: + case ISD::SETFALSE: + case ISD::SETFALSE2: + case ISD::SETTRUE: + case ISD::SETTRUE2: + case ISD::SETUO: + case ISD::SETO: + assert(0 && "Operation should already be optimised !"); + case ISD::SETULE: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETOLT: + case ISD::SETLE: + case ISD::SETLT: { + if (LHS == True) + return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); + else + return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); + } + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGE: + case ISD::SETOGE: + case ISD::SETUGT: + case ISD::SETOGT: { + if (LHS == True) + return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); + else + return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); + } + case ISD::SETCC_INVALID: + assert(0 && "Invalid setcc condcode !"); + } + return Op; +} + + + +SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue Num = Op.getOperand(0); + SDValue Den = Op.getOperand(1); + + SmallVector<SDValue, 8> Results; + + // RCP = URECIP(Den) = 2^32 / Den + e + // e is rounding error. + SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); + + // RCP_LO = umulo(RCP, Den) */ + SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); + + // RCP_HI = mulhu (RCP, Den) */ + SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); + + // NEG_RCP_LO = -RCP_LO + SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + RCP_LO); + + // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) + SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), + NEG_RCP_LO, RCP_LO, + ISD::SETEQ); + // Calculate the rounding error from the URECIP instruction + // E = mulhu(ABS_RCP_LO, RCP) + SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); + + // RCP_A_E = RCP + E + SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); + + // RCP_S_E = RCP - E + SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); + + // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) + SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), + RCP_A_E, RCP_S_E, + ISD::SETEQ); + // Quotient = mulhu(Tmp0, Num) + SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); + + // Num_S_Remainder = Quotient * Den + SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); + + // Remainder = Num - Num_S_Remainder + SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); + + // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) + SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, + DAG.getConstant(-1, VT), + DAG.getConstant(0, VT), + ISD::SETGE); + // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) + SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, + DAG.getConstant(0, VT), + DAG.getConstant(-1, VT), + DAG.getConstant(0, VT), + ISD::SETGE); + // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero + SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, + Remainder_GE_Zero); + + // Calculate Division result: + + // Quotient_A_One = Quotient + 1 + SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, + DAG.getConstant(1, VT)); + + // Quotient_S_One = Quotient - 1 + SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, + DAG.getConstant(1, VT)); + + // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) + SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), + Quotient, Quotient_A_One, ISD::SETEQ); + + // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) + Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), + Quotient_S_One, Div, ISD::SETEQ); + + // Calculate Rem result: + + // Remainder_S_Den = Remainder - Den + SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); + + // Remainder_A_Den = Remainder + Den + SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); + + // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) + SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), + Remainder, Remainder_S_Den, ISD::SETEQ); + + // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) + Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), + Remainder_A_Den, Rem, ISD::SETEQ); + SDValue Ops[2]; + Ops[0] = Div; + Ops[1] = Rem; + return DAG.getMergeValues(Ops, 2, DL); +} + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { + if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { + return CFP->isExactlyValue(1.0); + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + return C->isAllOnesValue(); + } + return false; +} + +bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { + if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { + return CFP->getValueAPF().isZero(); + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + return C->isNullValue(); + } + return false; +} + +SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, + const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned VirtualRegister; + if (!MRI.isLiveIn(Reg)) { + VirtualRegister = MRI.createVirtualRegister(RC); + MRI.addLiveIn(Reg, VirtualRegister); + } else { + VirtualRegister = MRI.getLiveInVirtReg(Reg); + } + return DAG.getRegister(VirtualRegister, VT); +} + +#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; + +const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + // AMDIL DAG nodes + NODE_NAME_CASE(MAD); + NODE_NAME_CASE(CALL); + NODE_NAME_CASE(UMUL); + NODE_NAME_CASE(DIV_INF); + NODE_NAME_CASE(RET_FLAG); + NODE_NAME_CASE(BRANCH_COND); + + // AMDGPU DAG nodes + NODE_NAME_CASE(DWORDADDR) + NODE_NAME_CASE(FRACT) + NODE_NAME_CASE(FMAX) + NODE_NAME_CASE(SMAX) + NODE_NAME_CASE(UMAX) + NODE_NAME_CASE(FMIN) + NODE_NAME_CASE(SMIN) + NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(URECIP) + NODE_NAME_CASE(INTERP) + NODE_NAME_CASE(INTERP_P0) + NODE_NAME_CASE(EXPORT) + } +} diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h new file mode 100644 index 0000000000..c7abaf69b4 --- /dev/null +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -0,0 +1,144 @@ +//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface definition of the TargetLowering class that is common +/// to all AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUISELLOWERING_H +#define AMDGPUISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class MachineRegisterInfo; + +class AMDGPUTargetLowering : public TargetLowering { +private: + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + +protected: + + /// \brief Helper function that adds Reg to the LiveIn list of the DAG's + /// MachineFunction. + /// + /// \returns a RegisterSDNode representing Reg. + SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, + unsigned Reg, EVT VT) const; + + bool isHWTrueValue(SDValue Op) const; + bool isHWFalseValue(SDValue Op) const; + +public: + AMDGPUTargetLowering(TargetMachine &TM); + + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc DL, SelectionDAG &DAG) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; + virtual const char* getTargetNodeName(unsigned Opcode) const; + +// Functions defined in AMDILISelLowering.cpp +public: + + /// \brief Determine which of the bits specified in \p Mask are known to be + /// either zero or one and return them in the \p KnownZero and \p KnownOne + /// bitsets. + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const; + + /// We want to mark f32/f64 floating point values as legal. + bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + + /// We don't want to shrink f64/f32 constants. + bool ShouldShrinkFPConstant(EVT VT) const; + +private: + void InitAMDILLowering(); + SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; +}; + +namespace AMDGPUISD { + +enum { + // AMDIL ISD Opcodes + FIRST_NUMBER = ISD::BUILTIN_OP_END, + MAD, // 32bit Fused Multiply Add instruction + CALL, // Function call based on a single integer + UMUL, // 32bit unsigned multiplication + DIV_INF, // Divide with infinity returned on zero divisor + RET_FLAG, + BRANCH_COND, + // End AMDIL ISD Opcodes + BITALIGN, + DWORDADDR, + FRACT, + FMAX, + SMAX, + UMAX, + FMIN, + SMIN, + UMIN, + URECIP, + INTERP, + INTERP_P0, + EXPORT, + LAST_AMDGPU_ISD_NUMBER +}; + + +} // End namespace AMDGPUISD + +namespace SIISD { + +enum { + SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER, + VCC_AND, + VCC_BITCAST +}; + +} // End namespace SIISD + +} // End namespace llvm + +#endif // AMDGPUISELLOWERING_H diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp new file mode 100644 index 0000000000..e42a46d839 --- /dev/null +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -0,0 +1,257 @@ +//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Implementation of the TargetInstrInfo class that is common to all +/// AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUInstrInfo.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#define GET_INSTRINFO_CTOR +#include "AMDGPUGenInstrInfo.inc" + +using namespace llvm; + +AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm) + : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { } + +const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { + return RI; +} + +bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { +// TODO: Implement this function + return false; +} + +unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} + +unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} + +bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { +// TODO: Implement this function + return false; +} +unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} +unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} +bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { +// TODO: Implement this function + return false; +} + +MachineInstr * +AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { +// TODO: Implement this function + return NULL; +} +bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, + MachineBasicBlock &MBB) const { + while (iter != MBB.end()) { + switch (iter->getOpcode()) { + default: + break; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: + case AMDGPU::BRANCH: + return true; + }; + ++iter; + } + return false; +} + +MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator tmp = MBB->end(); + if (!MBB->size()) { + return MBB->end(); + } + while (--tmp) { + if (tmp->getOpcode() == AMDGPU::ENDLOOP + || tmp->getOpcode() == AMDGPU::ENDIF + || tmp->getOpcode() == AMDGPU::ELSE) { + if (tmp == MBB->begin()) { + return tmp; + } else { + continue; + } + } else { + return ++tmp; + } + } + return MBB->end(); +} + +void +AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(!"Not Implemented"); +} + +void +AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(!"Not Implemented"); +} + +MachineInstr * +AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const { +// TODO: Implement this function + return 0; +} +MachineInstr* +AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr *LoadMI) const { + // TODO: Implement this function + return 0; +} +bool +AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const { + // TODO: Implement this function + return false; +} +bool +AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, + bool UnfoldStore, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + // TODO: Implement this function + return false; +} + +bool +AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl<SDNode*> &NewNodes) const { + // TODO: Implement this function + return false; +} + +unsigned +AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex) const { + // TODO: Implement this function + return 0; +} + +bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + assert(Offset2 > Offset1 + && "Second offset should be larger than first offset!"); + // If we have less than 16 loads in a row, and the offsets are within 16, + // then schedule together. + // TODO: Make the loads schedule near if it fits in a cacheline + return (NumLoads < 16 && (Offset2 - Offset1) < 16); +} + +bool +AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + const { + // TODO: Implement this function + return true; +} +void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + // TODO: Implement this function +} + +bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const { + // TODO: Implement this function + return false; +} +bool +AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) + const { + // TODO: Implement this function + return false; +} + +bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + // TODO: Implement this function + return false; +} + +bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const { + // TODO: Implement this function + return MI->getDesc().isPredicable(); +} + +bool +AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + // TODO: Implement this function + return true; +} + +void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const AMDGPURegisterInfo & RI = getRegisterInfo(); + + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + MachineOperand &MO = MI.getOperand(i); + // Convert dst regclass to one that is supported by the ISA + if (MO.isReg() && MO.isDef()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); + const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); + + assert(newRegClass); + + MRI.setRegClass(MO.getReg(), newRegClass); + } + } + } +} diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h new file mode 100644 index 0000000000..cb97af9831 --- /dev/null +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -0,0 +1,148 @@ +//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Contains the definition of a TargetInstrInfo class that is common +/// to all AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUINSTRUCTIONINFO_H +#define AMDGPUINSTRUCTIONINFO_H + +#include "AMDGPUInstrInfo.h" +#include "AMDGPURegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <map> + +#define GET_INSTRINFO_HEADER +#define GET_INSTRINFO_ENUM +#include "AMDGPUGenInstrInfo.inc" + +#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT +#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT +#define OPCODE_IS_ZERO AMDGPU::PRED_SETE +#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE + +namespace llvm { + +class AMDGPUTargetMachine; +class MachineFunction; +class MachineInstr; +class MachineInstrBuilder; + +class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { +private: + const AMDGPURegisterInfo RI; + TargetMachine &TM; + bool getNextBranchInstr(MachineBasicBlock::iterator &iter, + MachineBasicBlock &MBB) const; +public: + explicit AMDGPUInstrInfo(TargetMachine &tm); + + virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; + + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + bool hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + bool hasStoreFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + + MachineInstr * + convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const = 0; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + +protected: + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr *LoadMI) const; +public: + bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl<unsigned> &Ops) const; + bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl<MachineInstr *> &NewMIs) const; + bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl<SDNode *> &NewNodes) const; + unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = 0) const; + bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + bool isPredicated(const MachineInstr *MI) const; + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + bool isPredicable(MachineInstr *MI) const; + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + + // Helper functions that check the opcode for status information + bool isLoadInst(llvm::MachineInstr *MI) const; + bool isExtLoadInst(llvm::MachineInstr *MI) const; + bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; + bool isSExtLoadInst(llvm::MachineInstr *MI) const; + bool isZExtLoadInst(llvm::MachineInstr *MI) const; + bool isAExtLoadInst(llvm::MachineInstr *MI) const; + bool isStoreInst(llvm::MachineInstr *MI) const; + bool isTruncStoreInst(llvm::MachineInstr *MI) const; + + virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const = 0; + virtual unsigned getIEQOpcode() const = 0; + virtual bool isMov(unsigned opcode) const = 0; + + /// \brief Convert the AMDIL MachineInstr to a supported ISA + /// MachineInstr + virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const; + +}; + +} // End llvm namespace + +#endif // AMDGPUINSTRINFO_H diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td new file mode 100644 index 0000000000..96368e8541 --- /dev/null +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -0,0 +1,74 @@ +//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains DAG node defintions for the AMDGPU target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AMDGPU DAG Profiles +//===----------------------------------------------------------------------===// + +def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> +]>; + +//===----------------------------------------------------------------------===// +// AMDGPU DAG Nodes +// + +// out = ((a << 32) | b) >> c) +// +// Can be used to optimize rtol: +// rotl(a, b) = bitalign(a, a, 32 - b) +def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>; + +// This argument to this node is a dword address. +def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; + +// out = a - floor(a) +def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; + +// out = max(a, b) a and b are floats +def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = max(a, b) a and b are signed ints +def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = max(a, b) a and b are unsigned ints +def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are floats +def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a snd b are signed ints +def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are unsigned ints +def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// urecip - This operation is a helper for integer division, it returns the +// result of 1 / a as a fractional unsigned integer. +// out = (2^32 / a) + e +// e is rounding error +def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; + +def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td new file mode 100644 index 0000000000..e634d20b61 --- /dev/null +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -0,0 +1,190 @@ +//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains instruction defs that are common to all hw codegen +// targets. +// +//===----------------------------------------------------------------------===// + +class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { + field bits<16> AMDILOp = 0; + field bits<3> Gen = 0; + + let Namespace = "AMDGPU"; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + let Itinerary = NullALU; + let TSFlags{42-40} = Gen; + let TSFlags{63-48} = AMDILOp; +} + +class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> + : AMDGPUInst<outs, ins, asm, pattern> { + + field bits<32> Inst = 0xffffffff; + +} + +def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; + +def COND_EQ : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOEQ: case ISD::SETUEQ: + case ISD::SETEQ: return true;}}}] +>; + +def COND_NE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETONE: case ISD::SETUNE: + case ISD::SETNE: return true;}}}] +>; +def COND_GT : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOGT: case ISD::SETUGT: + case ISD::SETGT: return true;}}}] +>; + +def COND_GE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOGE: case ISD::SETUGE: + case ISD::SETGE: return true;}}}] +>; + +def COND_LT : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOLT: case ISD::SETULT: + case ISD::SETLT: return true;}}}] +>; + +def COND_LE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOLE: case ISD::SETULE: + case ISD::SETLE: return true;}}}] +>; + +//===----------------------------------------------------------------------===// +// Load/Store Pattern Fragments +//===----------------------------------------------------------------------===// + +def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{ + return isGlobalLoad(dyn_cast<LoadSDNode>(N)); +}]>; + +class Constants { +int TWO_PI = 0x40c90fdb; +int PI = 0x40490fdb; +int TWO_PI_INV = 0x3e22f983; +} +def CONST : Constants; + +def FP_ZERO : PatLeaf < + (fpimm), + [{return N->getValueAPF().isZero();}] +>; + +def FP_ONE : PatLeaf < + (fpimm), + [{return N->isExactlyValue(1.0);}] +>; + +let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { + +class CLAMP <RegisterClass rc> : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "CLAMP $dst, $src0", + [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] +>; + +class FABS <RegisterClass rc> : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "FABS $dst, $src0", + [(set rc:$dst, (fabs rc:$src0))] +>; + +class FNEG <RegisterClass rc> : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "FNEG $dst, $src0", + [(set rc:$dst, (fneg rc:$src0))] +>; + +def SHADER_TYPE : AMDGPUShaderInst < + (outs), + (ins i32imm:$type), + "SHADER_TYPE $type", + [(int_AMDGPU_shader_type imm:$type)] +>; + +} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 + +/* Generic helper patterns for intrinsics */ +/* -------------------------------------- */ + +class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul, + RegisterClass rc> : Pat < + (fpow rc:$src0, rc:$src1), + (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) +>; + +/* Other helper patterns */ +/* --------------------- */ + +/* Extract element pattern */ +class Extract_Element <ValueType sub_type, ValueType vec_type, + RegisterClass vec_class, int sub_idx, + SubRegIndex sub_reg>: Pat< + (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), + (EXTRACT_SUBREG vec_class:$src, sub_reg) +>; + +/* Insert element pattern */ +class Insert_Element <ValueType elem_type, ValueType vec_type, + RegisterClass elem_class, RegisterClass vec_class, + int sub_idx, SubRegIndex sub_reg> : Pat < + + (vec_type (vector_insert (vec_type vec_class:$vec), + (elem_type elem_class:$elem), sub_idx)), + (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) +>; + +// Vector Build pattern +class Vector_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < + (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), + (elemType elemClass:$z), (elemType elemClass:$w))), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG + (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y), + elemClass:$z, sel_z), elemClass:$w, sel_w) +>; + +// bitconvert pattern +class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < + (dt (bitconvert (st rc:$src0))), + (dt rc:$src0) +>; + +class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < + (vt (AMDGPUdwordaddr (vt rc:$addr))), + (vt rc:$addr) +>; + +include "R600Instructions.td" + +include "SIInstrInfo.td" + diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td new file mode 100644 index 0000000000..2ba2d4b90d --- /dev/null +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -0,0 +1,62 @@ +//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines intrinsics that are used by all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "AMDGPU", isTarget = 1 in { + + def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; + def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; + def int_AMDGPU_kilp : Intrinsic<[], [], []>; + def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + + def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>; +} + +let TargetPrefix = "TGSI", isTarget = 1 in { + + def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>; +} + +include "SIIntrinsics.td" diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp new file mode 100644 index 0000000000..1dc1c657df --- /dev/null +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -0,0 +1,83 @@ +//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst. +// +//===----------------------------------------------------------------------===// +// + +#include "AMDGPUMCInstLower.h" +#include "AMDGPUAsmPrinter.h" +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx): + Ctx(ctx) +{ } + +void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_FPImmediate: { + const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); + assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle && + "Only floating point immediates are supported at the moment."); + MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_Register: + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), Ctx)); + } + OutMI.addOperand(MCOp); + } +} + +void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { + AMDGPUMCInstLower MCInstLowering(OutContext); + + if (MI->isBundle()) { + const MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::const_instr_iterator I = MI; + ++I; + while (I != MBB->end() && I->isInsideBundle()) { + MCInst MCBundleInst; + const MachineInstr *BundledInst = I; + MCInstLowering.lower(BundledInst, MCBundleInst); + OutStreamer.EmitInstruction(MCBundleInst); + ++I; + } + } else { + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst); + OutStreamer.EmitInstruction(TmpInst); + } +} diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h new file mode 100644 index 0000000000..d7d538e925 --- /dev/null +++ b/lib/Target/R600/AMDGPUMCInstLower.h @@ -0,0 +1,34 @@ +//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_MCINSTLOWER_H +#define AMDGPU_MCINSTLOWER_H + +namespace llvm { + +class MCInst; +class MCContext; +class MachineInstr; + +class AMDGPUMCInstLower { + + MCContext &Ctx; + +public: + AMDGPUMCInstLower(MCContext &ctx); + + /// \brief Lower a MachineInstr to an MCInst + void lower(const MachineInstr *MI, MCInst &OutMI) const; + +}; + +} // End namespace llvm + +#endif //AMDGPU_MCINSTLOWER_H diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp new file mode 100644 index 0000000000..eeafec898d --- /dev/null +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -0,0 +1,51 @@ +//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Parent TargetRegisterInfo class common to all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +using namespace llvm; + +AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPUGenRegisterInfo(0), + TM(tm), + TII(tii) + { } + +//===----------------------------------------------------------------------===// +// Function handling callbacks - Functions are a seldom used feature of GPUS, so +// they are not supported at this time. +//===----------------------------------------------------------------------===// + +const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; + +const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) + const { + return &CalleeSavedReg; +} + +void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, + RegScavenger *RS) const { + assert(!"Subroutines not supported yet"); +} + +unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { + assert(!"Subroutines not supported yet"); + return 0; +} + +#define GET_REGINFO_TARGET_DESC +#include "AMDGPUGenRegisterInfo.inc" diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h new file mode 100644 index 0000000000..76ee7ae06a --- /dev/null +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -0,0 +1,63 @@ +//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief TargetRegisterInfo interface that is implemented by all hw codegen +/// targets. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUREGISTERINFO_H +#define AMDGPUREGISTERINFO_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#define GET_REGINFO_ENUM +#include "AMDGPUGenRegisterInfo.inc" + +namespace llvm { + +class AMDGPUTargetMachine; +class TargetInstrInfo; + +struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { + TargetMachine &TM; + const TargetInstrInfo &TII; + static const uint16_t CalleeSavedReg; + + AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const { + assert(!"Unimplemented"); return BitVector(); + } + + /// \param RC is an AMDIL reg class. + /// + /// \returns The ISA reg class that is equivalent to \p RC. + virtual const TargetRegisterClass * getISARegClass( + const TargetRegisterClass * RC) const { + assert(!"Unimplemented"); return NULL; + } + + virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { + assert(!"Unimplemented"); return NULL; + } + + const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + RegScavenger *RS) const; + unsigned getFrameRegister(const MachineFunction &MF) const; + +}; + +} // End namespace llvm + +#endif // AMDIDSAREGISTERINFO_H diff --git a/lib/Target/R600/AMDGPURegisterInfo.td b/lib/Target/R600/AMDGPURegisterInfo.td new file mode 100644 index 0000000000..8181e023aa --- /dev/null +++ b/lib/Target/R600/AMDGPURegisterInfo.td @@ -0,0 +1,22 @@ +//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Tablegen register definitions common to all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +let Namespace = "AMDGPU" in { + def sel_x : SubRegIndex; + def sel_y : SubRegIndex; + def sel_z : SubRegIndex; + def sel_w : SubRegIndex; +} + +include "R600RegisterInfo.td" +include "SIRegisterInfo.td" diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp new file mode 100644 index 0000000000..8295efdd4b --- /dev/null +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -0,0 +1,714 @@ +//===-- AMDGPUStructurizeCFG.cpp - ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// The pass implemented in this file transforms the programs control flow +/// graph into a form that's suitable for code generation on hardware that +/// implements control flow by execution masking. This currently includes all +/// AMD GPUs but may as well be useful for other types of hardware. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/RegionPass.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" + +using namespace llvm; + +namespace { + +// Definition of the complex types used in this pass. + +typedef std::pair<BasicBlock *, Value *> BBValuePair; +typedef ArrayRef<BasicBlock*> BBVecRef; + +typedef SmallVector<RegionNode*, 8> RNVector; +typedef SmallVector<BasicBlock*, 8> BBVector; +typedef SmallVector<BBValuePair, 2> BBValueVector; + +typedef DenseMap<PHINode *, BBValueVector> PhiMap; +typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; +typedef DenseMap<BasicBlock *, Value *> BBPredicates; +typedef DenseMap<BasicBlock *, BBPredicates> PredMap; +typedef DenseMap<BasicBlock *, unsigned> VisitedMap; + +// The name for newly created blocks. + +static const char *FlowBlockName = "Flow"; + +/// @brief Transforms the control flow graph on one single entry/exit region +/// at a time. +/// +/// After the transform all "If"/"Then"/"Else" style control flow looks like +/// this: +/// +/// \verbatim +/// 1 +/// || +/// | | +/// 2 | +/// | / +/// |/ +/// 3 +/// || Where: +/// | | 1 = "If" block, calculates the condition +/// 4 | 2 = "Then" subregion, runs if the condition is true +/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow +/// |/ 4 = "Else" optional subregion, runs if the condition is false +/// 5 5 = "End" block, also rejoins the control flow +/// \endverbatim +/// +/// Control flow is expressed as a branch where the true exit goes into the +/// "Then"/"Else" region, while the false exit skips the region +/// The condition for the optional "Else" region is expressed as a PHI node. +/// The incomming values of the PHI node are true for the "If" edge and false +/// for the "Then" edge. +/// +/// Additionally to that even complicated loops look like this: +/// +/// \verbatim +/// 1 +/// || +/// | | +/// 2 ^ Where: +/// | / 1 = "Entry" block +/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block +/// 3 3 = "Flow" block, with back edge to entry block +/// | +/// \endverbatim +/// +/// The back edge of the "Flow" block is always on the false side of the branch +/// while the true side continues the general flow. So the loop condition +/// consist of a network of PHI nodes where the true incoming values expresses +/// breaks and the false values expresses continue states. +class AMDGPUStructurizeCFG : public RegionPass { + + static char ID; + + Type *Boolean; + ConstantInt *BoolTrue; + ConstantInt *BoolFalse; + UndefValue *BoolUndef; + + Function *Func; + Region *ParentRegion; + + DominatorTree *DT; + + RNVector Order; + VisitedMap Visited; + PredMap Predicates; + BBPhiMap DeletedPhis; + BBVector FlowsInserted; + + BasicBlock *LoopStart; + BasicBlock *LoopEnd; + BBPredicates LoopPred; + + void orderNodes(); + + void buildPredicate(BranchInst *Term, unsigned Idx, + BBPredicates &Pred, bool Invert); + + void analyzeBlock(BasicBlock *BB); + + void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx); + + void collectInfos(); + + bool dominatesPredicates(BasicBlock *A, BasicBlock *B); + + void killTerminator(BasicBlock *BB); + + RegionNode *skipChained(RegionNode *Node); + + void delPhiValues(BasicBlock *From, BasicBlock *To); + + void addPhiValues(BasicBlock *From, BasicBlock *To); + + BasicBlock *getNextFlow(BasicBlock *Prev); + + bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); + + BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node); + + void createFlow(); + + void insertConditions(); + + void rebuildSSA(); + +public: + AMDGPUStructurizeCFG(): + RegionPass(ID) { + + initializeRegionInfoPass(*PassRegistry::getPassRegistry()); + } + + virtual bool doInitialization(Region *R, RGPassManager &RGM); + + virtual bool runOnRegion(Region *R, RGPassManager &RGM); + + virtual const char *getPassName() const { + return "AMDGPU simplify control flow"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + RegionPass::getAnalysisUsage(AU); + } + +}; + +} // end anonymous namespace + +char AMDGPUStructurizeCFG::ID = 0; + +/// \brief Initialize the types and constants used in the pass +bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { + LLVMContext &Context = R->getEntry()->getContext(); + + Boolean = Type::getInt1Ty(Context); + BoolTrue = ConstantInt::getTrue(Context); + BoolFalse = ConstantInt::getFalse(Context); + BoolUndef = UndefValue::get(Boolean); + + return false; +} + +/// \brief Build up the general order of nodes +void AMDGPUStructurizeCFG::orderNodes() { + scc_iterator<Region *> I = scc_begin(ParentRegion), + E = scc_end(ParentRegion); + for (Order.clear(); I != E; ++I) { + std::vector<RegionNode *> &Nodes = *I; + Order.append(Nodes.begin(), Nodes.end()); + } +} + +/// \brief Build blocks and loop predicates +void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx, + BBPredicates &Pred, bool Invert) { + Value *True = Invert ? BoolFalse : BoolTrue; + Value *False = Invert ? BoolTrue : BoolFalse; + + RegionInfo *RI = ParentRegion->getRegionInfo(); + BasicBlock *BB = Term->getParent(); + + // Handle the case where multiple regions start at the same block + Region *R = BB != ParentRegion->getEntry() ? + RI->getRegionFor(BB) : ParentRegion; + + if (R == ParentRegion) { + // It's a top level block in our region + Value *Cond = True; + if (Term->isConditional()) { + BasicBlock *Other = Term->getSuccessor(!Idx); + + if (Visited.count(Other)) { + if (!Pred.count(Other)) + Pred[Other] = False; + + if (!Pred.count(BB)) + Pred[BB] = True; + return; + } + Cond = Term->getCondition(); + + if (Idx != Invert) + Cond = BinaryOperator::CreateNot(Cond, "", Term); + } + + Pred[BB] = Cond; + + } else if (ParentRegion->contains(R)) { + // It's a block in a sub region + while(R->getParent() != ParentRegion) + R = R->getParent(); + + Pred[R->getEntry()] = True; + + } else { + // It's a branch from outside into our parent region + Pred[BB] = True; + } +} + +/// \brief Analyze the successors of each block and build up predicates +void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + BBPredicates &Pred = Predicates[BB]; + + for (; PI != PE; ++PI) { + BranchInst *Term = cast<BranchInst>((*PI)->getTerminator()); + + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); + if (Succ != BB) + continue; + buildPredicate(Term, i, Pred, false); + } + } +} + +/// \brief Analyze the conditions leading to loop to a previous block +void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) { + BranchInst *Term = cast<BranchInst>(BB->getTerminator()); + + for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { + BasicBlock *Succ = Term->getSuccessor(i); + + // Ignore it if it's not a back edge + if (!Visited.count(Succ)) + continue; + + buildPredicate(Term, i, LoopPred, true); + + LoopEnd = BB; + if (Visited[Succ] < LoopIdx) { + LoopIdx = Visited[Succ]; + LoopStart = Succ; + } + } +} + +/// \brief Collect various loop and predicate infos +void AMDGPUStructurizeCFG::collectInfos() { + unsigned Number = 0, LoopIdx = ~0; + + // Reset predicate + Predicates.clear(); + + // and loop infos + LoopStart = LoopEnd = 0; + LoopPred.clear(); + + RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); + for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) { + + // Analyze all the conditions leading to a node + analyzeBlock((*OI)->getEntry()); + + if ((*OI)->isSubRegion()) + continue; + + // Find the first/last loop nodes and loop predicates + analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx); + } +} + +/// \brief Does A dominate all the predicates of B ? +bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) { + BBPredicates &Preds = Predicates[B]; + for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + + if (!DT->dominates(A, PI->first)) + return false; + } + return true; +} + +/// \brief Remove phi values from all successors and the remove the terminator. +void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { + TerminatorInst *Term = BB->getTerminator(); + if (!Term) + return; + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); + SI != SE; ++SI) { + + delPhiValues(BB, *SI); + } + + Term->eraseFromParent(); +} + +/// First: Skip forward to the first region node that either isn't a subregion or not +/// dominating it's exit, remove all the skipped nodes from the node order. +/// +/// Second: Handle the first successor directly if the resulting nodes successor +/// predicates are still dominated by the original entry +RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) { + BasicBlock *Entry = Node->getEntry(); + + // Skip forward as long as it is just a linear flow + while (true) { + BasicBlock *Entry = Node->getEntry(); + BasicBlock *Exit; + + if (Node->isSubRegion()) { + Exit = Node->getNodeAs<Region>()->getExit(); + } else { + TerminatorInst *Term = Entry->getTerminator(); + if (Term->getNumSuccessors() != 1) + break; + Exit = Term->getSuccessor(0); + } + + // It's a back edge, break here so we can insert a loop node + if (!Visited.count(Exit)) + return Node; + + // More than node edges are pointing to exit + if (!DT->dominates(Entry, Exit)) + return Node; + + RegionNode *Next = ParentRegion->getNode(Exit); + RNVector::iterator I = std::find(Order.begin(), Order.end(), Next); + assert(I != Order.end()); + + Visited.erase(Next->getEntry()); + Order.erase(I); + Node = Next; + } + + BasicBlock *BB = Node->getEntry(); + TerminatorInst *Term = BB->getTerminator(); + if (Term->getNumSuccessors() != 2) + return Node; + + // Our node has exactly two succesors, check if we can handle + // any of them directly + BasicBlock *Succ = Term->getSuccessor(0); + if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) { + Succ = Term->getSuccessor(1); + if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) + return Node; + } else { + BasicBlock *Succ2 = Term->getSuccessor(1); + if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] && + dominatesPredicates(Entry, Succ2)) + Succ = Succ2; + } + + RegionNode *Next = ParentRegion->getNode(Succ); + RNVector::iterator E = Order.end(); + RNVector::iterator I = std::find(Order.begin(), E, Next); + assert(I != E); + + killTerminator(BB); + FlowsInserted.push_back(BB); + Visited.erase(Succ); + Order.erase(I); + return ParentRegion->getNode(wireFlowBlock(BB, Next)); +} + +/// \brief Remove all PHI values coming from "From" into "To" and remember +/// them in DeletedPhis +void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { + PhiMap &Map = DeletedPhis[To]; + for (BasicBlock::iterator I = To->begin(), E = To->end(); + I != E && isa<PHINode>(*I);) { + + PHINode &Phi = cast<PHINode>(*I++); + while (Phi.getBasicBlockIndex(From) != -1) { + Value *Deleted = Phi.removeIncomingValue(From, false); + Map[&Phi].push_back(std::make_pair(From, Deleted)); + } + } +} + +/// \brief Add the PHI values back once we knew the new predecessor +void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { + if (!DeletedPhis.count(To)) + return; + + PhiMap &Map = DeletedPhis[To]; + SSAUpdater Updater; + + for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { + + PHINode *Phi = I->first; + Updater.Initialize(Phi->getType(), ""); + BasicBlock *Fallback = To; + bool HaveFallback = false; + + for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end(); + VI != VE; ++VI) { + + Updater.AddAvailableValue(VI->first, VI->second); + BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first); + if (Dom == VI->first) + HaveFallback = true; + else if (Dom != Fallback) + HaveFallback = false; + Fallback = Dom; + } + if (!HaveFallback) { + Value *Undef = UndefValue::get(Phi->getType()); + Updater.AddAvailableValue(Fallback, Undef); + } + + Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From); + } + DeletedPhis.erase(To); +} + +/// \brief Create a new flow node and update dominator tree and region info +BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { + LLVMContext &Context = Func->getContext(); + BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : + Order.back()->getEntry(); + BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, + Func, Insert); + DT->addNewBlock(Flow, Prev); + ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); + FlowsInserted.push_back(Flow); + return Flow; +} + +/// \brief Can we predict that this node will always be called? +bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev, + BasicBlock *Node) { + BBPredicates &Preds = Predicates[Node]; + bool Dominated = false; + + for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + + if (I->second != BoolTrue) + return false; + + if (!Dominated && DT->dominates(I->first, Prev)) + Dominated = true; + } + return Dominated; +} + +/// \brief Wire up the new control flow by inserting or updating the branch +/// instructions at node exits +BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, + RegionNode *Node) { + BasicBlock *Entry = Node->getEntry(); + + if (LoopStart == Entry) { + LoopStart = Prev; + LoopPred[Prev] = BoolTrue; + } + + // Wire it up temporary, skipChained may recurse into us + BranchInst::Create(Entry, Prev); + DT->changeImmediateDominator(Entry, Prev); + addPhiValues(Prev, Entry); + + Node = skipChained(Node); + + BasicBlock *Next = getNextFlow(Prev); + if (!isPredictableTrue(Prev, Entry)) { + // Let Prev point to entry and next block + Prev->getTerminator()->eraseFromParent(); + BranchInst::Create(Entry, Next, BoolUndef, Prev); + } else { + DT->changeImmediateDominator(Next, Entry); + } + + // Let node exit(s) point to next block + if (Node->isSubRegion()) { + Region *SubRegion = Node->getNodeAs<Region>(); + BasicBlock *Exit = SubRegion->getExit(); + + // Find all the edges from the sub region to the exit + BBVector ToDo; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { + if (SubRegion->contains(*I)) + ToDo.push_back(*I); + } + + // Modify the edges to point to the new flow block + for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) { + delPhiValues(*I, Exit); + TerminatorInst *Term = (*I)->getTerminator(); + Term->replaceUsesOfWith(Exit, Next); + } + + // Update the region info + SubRegion->replaceExit(Next); + + } else { + BasicBlock *BB = Node->getNodeAs<BasicBlock>(); + killTerminator(BB); + BranchInst::Create(Next, BB); + + if (BB == LoopEnd) + LoopEnd = 0; + } + + return Next; +} + +/// Destroy node order and visited map, build up flow order instead. +/// After this function control flow looks like it should be, but +/// branches only have undefined conditions. +void AMDGPUStructurizeCFG::createFlow() { + DeletedPhis.clear(); + + BasicBlock *Prev = Order.pop_back_val()->getEntry(); + assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); + Visited.erase(Prev); + + if (LoopStart == Prev) { + // Loop starts at entry, split entry so that we can predicate it + BasicBlock::iterator Insert = Prev->getFirstInsertionPt(); + BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName); + DT->addNewBlock(Split, Prev); + ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); + Predicates[Split] = Predicates[Prev]; + Order.push_back(ParentRegion->getBBNode(Split)); + LoopPred[Prev] = BoolTrue; + + } else if (LoopStart == Order.back()->getEntry()) { + // Loop starts behind entry, split entry so that we can jump to it + Instruction *Term = Prev->getTerminator(); + BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName); + DT->addNewBlock(Split, Prev); + ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion); + Prev = Split; + } + + killTerminator(Prev); + FlowsInserted.clear(); + FlowsInserted.push_back(Prev); + + while (!Order.empty()) { + RegionNode *Node = Order.pop_back_val(); + Visited.erase(Node->getEntry()); + Prev = wireFlowBlock(Prev, Node); + if (LoopStart && !LoopEnd) { + // Create an extra loop end node + LoopEnd = Prev; + Prev = getNextFlow(LoopEnd); + BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd); + addPhiValues(LoopEnd, LoopStart); + } + } + + BasicBlock *Exit = ParentRegion->getExit(); + BranchInst::Create(Exit, Prev); + addPhiValues(Prev, Exit); + if (DT->dominates(ParentRegion->getEntry(), Exit)) + DT->changeImmediateDominator(Exit, Prev); + + if (LoopStart && LoopEnd) { + BBVector::iterator FI = std::find(FlowsInserted.begin(), + FlowsInserted.end(), + LoopStart); + for (; *FI != LoopEnd; ++FI) { + addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0)); + } + } + + assert(Order.empty()); + assert(Visited.empty()); + assert(DeletedPhis.empty()); +} + +/// \brief Insert the missing branch conditions +void AMDGPUStructurizeCFG::insertConditions() { + SSAUpdater PhiInserter; + + for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end(); + FI != FE; ++FI) { + + BranchInst *Term = cast<BranchInst>((*FI)->getTerminator()); + if (Term->isUnconditional()) + continue; + + PhiInserter.Initialize(Boolean, ""); + PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse); + + BasicBlock *Succ = Term->getSuccessor(0); + BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ]; + for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + + PhiInserter.AddAvailableValue(PI->first, PI->second); + } + + Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI)); + } +} + +/// Handle a rare case where the disintegrated nodes instructions +/// no longer dominate all their uses. Not sure if this is really nessasary +void AMDGPUStructurizeCFG::rebuildSSA() { + SSAUpdater Updater; + for (Region::block_iterator I = ParentRegion->block_begin(), + E = ParentRegion->block_end(); + I != E; ++I) { + + BasicBlock *BB = *I; + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + + bool Initialized = false; + for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) { + + Next = I->getNext(); + + Instruction *User = cast<Instruction>(I->getUser()); + if (User->getParent() == BB) { + continue; + + } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { + if (UserPN->getIncomingBlock(*I) == BB) + continue; + } + + if (DT->dominates(II, User)) + continue; + + if (!Initialized) { + Value *Undef = UndefValue::get(II->getType()); + Updater.Initialize(II->getType(), ""); + Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); + Updater.AddAvailableValue(BB, II); + Initialized = true; + } + Updater.RewriteUseAfterInsertions(*I); + } + } + } +} + +/// \brief Run the transformation for each region found +bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { + if (R->isTopLevelRegion()) + return false; + + Func = R->getEntry()->getParent(); + ParentRegion = R; + + DT = &getAnalysis<DominatorTree>(); + + orderNodes(); + collectInfos(); + createFlow(); + insertConditions(); + rebuildSSA(); + + Order.clear(); + Visited.clear(); + Predicates.clear(); + DeletedPhis.clear(); + FlowsInserted.clear(); + + return true; +} + +/// \brief Create the pass +Pass *llvm::createAMDGPUStructurizeCFGPass() { + return new AMDGPUStructurizeCFG(); +} diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp new file mode 100644 index 0000000000..0f356a1c3f --- /dev/null +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -0,0 +1,87 @@ +//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Implements the AMDGPU specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUSubtarget.h" + +using namespace llvm; + +#define GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "AMDGPUGenSubtargetInfo.inc" + +AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : + AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) { + InstrItins = getInstrItineraryForCPU(CPU); + + memset(CapsOverride, 0, sizeof(*CapsOverride) + * AMDGPUDeviceInfo::MaxNumberCapabilities); + // Default card + StringRef GPU = CPU; + Is64bit = false; + DefaultSize[0] = 64; + DefaultSize[1] = 1; + DefaultSize[2] = 1; + ParseSubtargetFeatures(GPU, FS); + DevName = GPU; + Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit); +} + +AMDGPUSubtarget::~AMDGPUSubtarget() { + delete Device; +} + +bool +AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const { + assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities && + "Caps index is out of bounds!"); + return CapsOverride[caps]; +} +bool +AMDGPUSubtarget::is64bit() const { + return Is64bit; +} +bool +AMDGPUSubtarget::isTargetELF() const { + return false; +} +size_t +AMDGPUSubtarget::getDefaultSize(uint32_t dim) const { + if (dim > 3) { + return 1; + } else { + return DefaultSize[dim]; + } +} + +std::string +AMDGPUSubtarget::getDataLayout() const { + if (!Device) { + return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"); + } + return Device->getDataLayout(); +} + +std::string +AMDGPUSubtarget::getDeviceName() const { + return DevName; +} +const AMDGPUDevice * +AMDGPUSubtarget::device() const { + return Device; +} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h new file mode 100644 index 0000000000..1973fc6d54 --- /dev/null +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -0,0 +1,65 @@ +//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUSUBTARGET_H +#define AMDGPUSUBTARGET_H +#include "AMDILDevice.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "AMDGPUGenSubtargetInfo.inc" + +#define MAX_CB_SIZE (1 << 16) + +namespace llvm { + +class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { +private: + bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities]; + const AMDGPUDevice *Device; + size_t DefaultSize[3]; + std::string DevName; + bool Is64bit; + bool Is32on64bit; + bool DumpCode; + bool R600ALUInst; + + InstrItineraryData InstrItins; + +public: + AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS); + virtual ~AMDGPUSubtarget(); + + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool isOverride(AMDGPUDeviceInfo::Caps) const; + bool is64bit() const; + + // Helper functions to simplify if statements + bool isTargetELF() const; + const AMDGPUDevice* device() const; + std::string getDataLayout() const; + std::string getDeviceName() const; + virtual size_t getDefaultSize(uint32_t dim) const; + bool dumpCode() const { return DumpCode; } + bool r600ALUEncoding() const { return R600ALUInst; } + +}; + +} // End namespace llvm + +#endif // AMDGPUSUBTARGET_H diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp new file mode 100644 index 0000000000..d09dc2efff --- /dev/null +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -0,0 +1,142 @@ +//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief The AMDGPU target machine contains all of the hardware specific +/// information needed to emit code for R600 and SI GPUs. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "AMDGPU.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" +#include <llvm/CodeGen/Passes.h> + +using namespace llvm; + +extern "C" void LLVMInitializeR600Target() { + // Register the target + RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); +} + +AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OptLevel +) +: + LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + Subtarget(TT, CPU, FS), + Layout(Subtarget.getDataLayout()), + FrameLowering(TargetFrameLowering::StackGrowsUp, + Subtarget.device()->getStackAlignment(), 0), + IntrinsicInfo(this), + InstrItins(&Subtarget.getInstrItineraryData()) { + // TLInfo uses InstrInfo so it must be initialized after. + if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + InstrInfo = new R600InstrInfo(*this); + TLInfo = new R600TargetLowering(*this); + } else { + InstrInfo = new SIInstrInfo(*this); + TLInfo = new SITargetLowering(*this); + } +} + +AMDGPUTargetMachine::~AMDGPUTargetMachine() { +} + +namespace { +class AMDGPUPassConfig : public TargetPassConfig { +public: + AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AMDGPUTargetMachine &getAMDGPUTargetMachine() const { + return getTM<AMDGPUTargetMachine>(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // End of anonymous namespace + +TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { + return new AMDGPUPassConfig(this, PM); +} + +bool +AMDGPUPassConfig::addPreISel() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + addPass(createAMDGPUStructurizeCFGPass()); + addPass(createSIAnnotateControlFlowPass()); + } + return false; +} + +bool AMDGPUPassConfig::addInstSelector() { + addPass(createAMDGPUPeepholeOpt(*TM)); + addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); + return false; +} + +bool AMDGPUPassConfig::addPreRegAlloc() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + addPass(createSIAssignInterpRegsPass(*TM)); + } + addPass(createAMDGPUConvertToISAPass(*TM)); + return false; +} + +bool AMDGPUPassConfig::addPostRegAlloc() { + return false; +} + +bool AMDGPUPassConfig::addPreSched2() { + + addPass(&IfConverterID); + return false; +} + +bool AMDGPUPassConfig::addPreEmitPass() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + addPass(createAMDGPUCFGPreparationPass(*TM)); + addPass(createAMDGPUCFGStructurizerPass(*TM)); + addPass(createR600ExpandSpecialInstrsPass(*TM)); + addPass(&FinalizeMachineBundlesID); + } else { + addPass(createSILowerLiteralConstantsPass(*TM)); + addPass(createSILowerControlFlowPass(*TM)); + } + + return false; +} + diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h new file mode 100644 index 0000000000..91f9a83a29 --- /dev/null +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -0,0 +1,70 @@ +//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_TARGET_MACHINE_H +#define AMDGPU_TARGET_MACHINE_H + +#include "AMDGPUInstrInfo.h" +#include "AMDGPUSubtarget.h" +#include "AMDILFrameLowering.h" +#include "AMDILIntrinsicInfo.h" +#include "R600ISelLowering.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/IR/DataLayout.h" + +namespace llvm { + +MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT); + +class AMDGPUTargetMachine : public LLVMTargetMachine { + + AMDGPUSubtarget Subtarget; + const DataLayout Layout; + AMDGPUFrameLowering FrameLowering; + AMDGPUIntrinsicInfo IntrinsicInfo; + const AMDGPUInstrInfo * InstrInfo; + AMDGPUTargetLowering * TLInfo; + const InstrItineraryData* InstrItins; + +public: + AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS, + StringRef CPU, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~AMDGPUTargetMachine(); + virtual const AMDGPUFrameLowering* getFrameLowering() const { + return &FrameLowering; + } + virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const { + return &IntrinsicInfo; + } + virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;} + virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; } + virtual const AMDGPURegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + virtual AMDGPUTargetLowering * getTargetLowering() const { + return TLInfo; + } + virtual const InstrItineraryData* getInstrItineraryData() const { + return InstrItins; + } + virtual const DataLayout* getDataLayout() const { return &Layout; } + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +} // End namespace llvm + +#endif // AMDGPU_TARGET_MACHINE_H diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h new file mode 100644 index 0000000000..4e577dc234 --- /dev/null +++ b/lib/Target/R600/AMDIL.h @@ -0,0 +1,106 @@ +//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// This file contains the entry points for global functions defined in the LLVM +/// AMDGPU back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDIL_H +#define AMDIL_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +#define ARENA_SEGMENT_RESERVED_UAVS 12 +#define DEFAULT_ARENA_UAV_ID 8 +#define DEFAULT_RAW_UAV_ID 7 +#define GLOBAL_RETURN_RAW_UAV_ID 11 +#define HW_MAX_NUM_CB 8 +#define MAX_NUM_UNIQUE_UAVS 8 +#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8 +#define OPENCL_MAX_READ_IMAGES 128 +#define OPENCL_MAX_WRITE_IMAGES 8 +#define OPENCL_MAX_SAMPLERS 16 + +// The next two values can never be zero, as zero is the ID that is +// used to assert against. +#define DEFAULT_LDS_ID 1 +#define DEFAULT_GDS_ID 1 +#define DEFAULT_SCRATCH_ID 1 +#define DEFAULT_VEC_SLOTS 8 + +#define OCL_DEVICE_RV710 0x0001 +#define OCL_DEVICE_RV730 0x0002 +#define OCL_DEVICE_RV770 0x0004 +#define OCL_DEVICE_CEDAR 0x0008 +#define OCL_DEVICE_REDWOOD 0x0010 +#define OCL_DEVICE_JUNIPER 0x0020 +#define OCL_DEVICE_CYPRESS 0x0040 +#define OCL_DEVICE_CAICOS 0x0080 +#define OCL_DEVICE_TURKS 0x0100 +#define OCL_DEVICE_BARTS 0x0200 +#define OCL_DEVICE_CAYMAN 0x0400 +#define OCL_DEVICE_ALL 0x3FFF + +/// The number of function ID's that are reserved for +/// internal compiler usage. +const unsigned int RESERVED_FUNCS = 1024; + +namespace llvm { +class AMDGPUInstrPrinter; +class FunctionPass; +class MCAsmInfo; +class raw_ostream; +class Target; +class TargetMachine; + +// Instruction selection passes. +FunctionPass* + createAMDGPUISelDag(TargetMachine &TM); +FunctionPass* + createAMDGPUPeepholeOpt(TargetMachine &TM); + +// Pre emit passes. +FunctionPass* + createAMDGPUCFGPreparationPass(TargetMachine &TM); +FunctionPass* + createAMDGPUCFGStructurizerPass(TargetMachine &TM); + +extern Target TheAMDGPUTarget; +} // end namespace llvm; + +// Include device information enumerations +#include "AMDILDeviceInfo.h" + +namespace llvm { +/// OpenCL uses address spaces to differentiate between +/// various memory regions on the hardware. On the CPU +/// all of the address spaces point to the same memory, +/// however on the GPU, each address space points to +/// a seperate piece of memory that is unique from other +/// memory locations. +namespace AMDGPUAS { +enum AddressSpaces { + PRIVATE_ADDRESS = 0, ///< Address space for private memory. + GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). + CONSTANT_ADDRESS = 2, ///< Address space for constant memory. + LOCAL_ADDRESS = 3, ///< Address space for local memory. + REGION_ADDRESS = 4, ///< Address space for region memory. + ADDRESS_NONE = 5, ///< Address space for unknown memory. + PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) + PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) + USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI + LAST_ADDRESS = 9 +}; + +} // namespace AMDGPUAS + +} // end namespace llvm +#endif // AMDIL_H diff --git a/lib/Target/R600/AMDIL7XXDevice.cpp b/lib/Target/R600/AMDIL7XXDevice.cpp new file mode 100644 index 0000000000..ea6ac34f57 --- /dev/null +++ b/lib/Target/R600/AMDIL7XXDevice.cpp @@ -0,0 +1,115 @@ +//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// \file +//==-----------------------------------------------------------------------===// +#include "AMDIL7XXDevice.h" +#include "AMDGPUSubtarget.h" +#include "AMDILDevice.h" + +using namespace llvm; + +AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) { + setCaps(); + std::string name = mSTM->getDeviceName(); + if (name == "rv710") { + DeviceFlag = OCL_DEVICE_RV710; + } else if (name == "rv730") { + DeviceFlag = OCL_DEVICE_RV730; + } else { + DeviceFlag = OCL_DEVICE_RV770; + } +} + +AMDGPU7XXDevice::~AMDGPU7XXDevice() { +} + +void AMDGPU7XXDevice::setCaps() { + mSWBits.set(AMDGPUDeviceInfo::LocalMem); +} + +size_t AMDGPU7XXDevice::getMaxLDSSize() const { + if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_700; + } + return 0; +} + +size_t AMDGPU7XXDevice::getWavefrontSize() const { + return AMDGPUDevice::HalfWavefrontSize; +} + +uint32_t AMDGPU7XXDevice::getGeneration() const { + return AMDGPUDeviceInfo::HD4XXX; +} + +uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const { + switch (DeviceID) { + default: + assert(0 && "ID type passed in is unknown!"); + break; + case GLOBAL_ID: + case CONSTANT_ID: + case RAW_UAV_ID: + case ARENA_UAV_ID: + break; + case LDS_ID: + if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { + return DEFAULT_LDS_ID; + } + break; + case SCRATCH_ID: + if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { + return DEFAULT_SCRATCH_ID; + } + break; + case GDS_ID: + assert(0 && "GDS UAV ID is not supported on this chip"); + if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { + return DEFAULT_GDS_ID; + } + break; + }; + + return 0; +} + +uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const { + return 1; +} + +AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) { + setCaps(); +} + +AMDGPU770Device::~AMDGPU770Device() { +} + +void AMDGPU770Device::setCaps() { + if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { + mSWBits.set(AMDGPUDeviceInfo::FMA); + mHWBits.set(AMDGPUDeviceInfo::DoubleOps); + } + mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); + mHWBits.reset(AMDGPUDeviceInfo::LongOps); + mSWBits.set(AMDGPUDeviceInfo::LongOps); + mSWBits.set(AMDGPUDeviceInfo::LocalMem); +} + +size_t AMDGPU770Device::getWavefrontSize() const { + return AMDGPUDevice::WavefrontSize; +} + +AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) { +} + +AMDGPU710Device::~AMDGPU710Device() { +} + +size_t AMDGPU710Device::getWavefrontSize() const { + return AMDGPUDevice::QuarterWavefrontSize; +} diff --git a/lib/Target/R600/AMDIL7XXDevice.h b/lib/Target/R600/AMDIL7XXDevice.h new file mode 100644 index 0000000000..1cf4ca415a --- /dev/null +++ b/lib/Target/R600/AMDIL7XXDevice.h @@ -0,0 +1,72 @@ +//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +/// \file +/// \brief Interface for the subtarget data classes. +/// +/// This file will define the interface that each generation needs to +/// implement in order to correctly answer queries on the capabilities of the +/// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef AMDIL7XXDEVICEIMPL_H +#define AMDIL7XXDEVICEIMPL_H +#include "AMDILDevice.h" + +namespace llvm { +class AMDGPUSubtarget; + +//===----------------------------------------------------------------------===// +// 7XX generation of devices and their respective sub classes +//===----------------------------------------------------------------------===// + +/// \brief The AMDGPU7XXDevice class represents the generic 7XX device. +/// +/// All 7XX devices are derived from this class. The AMDGPU7XX device will only +/// support the minimal features that are required to be considered OpenCL 1.0 +/// compliant and nothing more. +class AMDGPU7XXDevice : public AMDGPUDevice { +public: + AMDGPU7XXDevice(AMDGPUSubtarget *ST); + virtual ~AMDGPU7XXDevice(); + virtual size_t getMaxLDSSize() const; + virtual size_t getWavefrontSize() const; + virtual uint32_t getGeneration() const; + virtual uint32_t getResourceID(uint32_t DeviceID) const; + virtual uint32_t getMaxNumUAVs() const; + +protected: + virtual void setCaps(); +}; + +/// \brief The AMDGPU770Device class represents the RV770 chip and it's +/// derivative cards. +/// +/// The difference between this device and the base class is this device device +/// adds support for double precision and has a larger wavefront size. +class AMDGPU770Device : public AMDGPU7XXDevice { +public: + AMDGPU770Device(AMDGPUSubtarget *ST); + virtual ~AMDGPU770Device(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; + +/// \brief The AMDGPU710Device class derives from the 7XX base class. +/// +/// This class is a smaller derivative, so we need to overload some of the +/// functions in order to correctly specify this information. +class AMDGPU710Device : public AMDGPU7XXDevice { +public: + AMDGPU710Device(AMDGPUSubtarget *ST); + virtual ~AMDGPU710Device(); + virtual size_t getWavefrontSize() const; +}; + +} // namespace llvm +#endif // AMDILDEVICEIMPL_H diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td new file mode 100644 index 0000000000..c12cedcf7f --- /dev/null +++ b/lib/Target/R600/AMDILBase.td @@ -0,0 +1,85 @@ +//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +// Dummy Instruction itineraries for pseudo instructions +def ALU_NULL : FuncUnit; +def NullALU : InstrItinClass; + +//===----------------------------------------------------------------------===// +// AMDIL Subtarget features. +//===----------------------------------------------------------------------===// +def FeatureFP64 : SubtargetFeature<"fp64", + "CapsOverride[AMDGPUDeviceInfo::DoubleOps]", + "true", + "Enable 64bit double precision operations">; +def FeatureByteAddress : SubtargetFeature<"byte_addressable_store", + "CapsOverride[AMDGPUDeviceInfo::ByteStores]", + "true", + "Enable byte addressable stores">; +def FeatureBarrierDetect : SubtargetFeature<"barrier_detect", + "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]", + "true", + "Enable duplicate barrier detection(HD5XXX or later).">; +def FeatureImages : SubtargetFeature<"images", + "CapsOverride[AMDGPUDeviceInfo::Images]", + "true", + "Enable image functions">; +def FeatureMultiUAV : SubtargetFeature<"multi_uav", + "CapsOverride[AMDGPUDeviceInfo::MultiUAV]", + "true", + "Generate multiple UAV code(HD5XXX family or later)">; +def FeatureMacroDB : SubtargetFeature<"macrodb", + "CapsOverride[AMDGPUDeviceInfo::MacroDB]", + "true", + "Use internal macrodb, instead of macrodb in driver">; +def FeatureNoAlias : SubtargetFeature<"noalias", + "CapsOverride[AMDGPUDeviceInfo::NoAlias]", + "true", + "assert that all kernel argument pointers are not aliased">; +def FeatureNoInline : SubtargetFeature<"no-inline", + "CapsOverride[AMDGPUDeviceInfo::NoInline]", + "true", + "specify whether to not inline functions">; + +def Feature64BitPtr : SubtargetFeature<"64BitPtr", + "Is64bit", + "false", + "Specify if 64bit addressing should be used.">; + +def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", + "Is32on64bit", + "false", + "Specify if 64bit sized pointers with 32bit addressing should be used.">; +def FeatureDebug : SubtargetFeature<"debug", + "CapsOverride[AMDGPUDeviceInfo::Debug]", + "true", + "Debug mode is enabled, so disable hardware accelerated address spaces.">; +def FeatureDumpCode : SubtargetFeature <"DumpCode", + "DumpCode", + "true", + "Dump MachineInstrs in the CodeEmitter">; + +def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", + "R600ALUInst", + "false", + "Older version of ALU instructions encoding.">; + + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + + +include "AMDILRegisterInfo.td" +include "AMDILInstrInfo.td" + diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp new file mode 100644 index 0000000000..aa8ab6bd93 --- /dev/null +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -0,0 +1,3049 @@ +//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// + +#define DEBUGME 0 +#define DEBUG_TYPE "structcfg" + +#include "AMDGPUInstrInfo.h" +#include "AMDIL.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +// TODO: move-begin. + +//===----------------------------------------------------------------------===// +// +// Statistics for CFGStructurizer. +// +//===----------------------------------------------------------------------===// + +STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " + "matched"); +STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " + "matched"); +STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " + "pattern matched"); +STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " + "pattern matched"); +STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " + "matched"); +STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); +STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); + +//===----------------------------------------------------------------------===// +// +// Miscellaneous utility for CFGStructurizer. +// +//===----------------------------------------------------------------------===// +namespace llvmCFGStruct { +#define SHOWNEWINSTR(i) \ + if (DEBUGME) errs() << "New instr: " << *i << "\n" + +#define SHOWNEWBLK(b, msg) \ +if (DEBUGME) { \ + errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + errs() << "\n"; \ +} + +#define SHOWBLK_DETAIL(b, msg) \ +if (DEBUGME) { \ + if (b) { \ + errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(errs()); \ + errs() << "\n"; \ + } \ +} + +#define INVALIDSCCNUM -1 +#define INVALIDREGNUM 0 + +template<class LoopinfoT> +void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { + for (typename LoopinfoT::iterator iter = LoopInfo.begin(), + iterEnd = LoopInfo.end(); + iter != iterEnd; ++iter) { + (*iter)->print(OS, 0); + } +} + +template<class NodeT> +void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { + size_t sz = Src.size(); + for (size_t i = 0; i < sz/2; ++i) { + NodeT *t = Src[i]; + Src[i] = Src[sz - i - 1]; + Src[sz - i - 1] = t; + } +} + +} //end namespace llvmCFGStruct + +//===----------------------------------------------------------------------===// +// +// supporting data structure for CFGStructurizer +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct { +template<class PassT> +struct CFGStructTraits { +}; + +template <class InstrT> +class BlockInformation { +public: + bool isRetired; + int sccNum; + //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr; + //Instructions defining the corresponding successor. + BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} +}; + +template <class BlockT, class InstrT, class RegiT> +class LandInformation { +public: + BlockT *landBlk; + std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before + //WHILELOOP(thisloop) init before entering + //thisloop. + std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after + //WHILELOOP(thisloop) init after entering + //thisloop. + std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop + //land block, branch cond on this reg. + std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break + //endif" after ENDLOOP(thisloop) break + //outerLoopOf(thisLoop). + std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue + //endif" after ENDLOOP(thisloop) continue on + //outerLoopOf(thisLoop). + LandInformation() : landBlk(NULL) {} +}; + +} //end of namespace llvmCFGStruct + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct { +// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. +template<class PassT> +class CFGStructurizer { +public: + typedef enum { + Not_SinglePath = 0, + SinglePath_InPath = 1, + SinglePath_NotInPath = 2 + } PathToKind; + +public: + typedef typename PassT::InstructionType InstrT; + typedef typename PassT::FunctionType FuncT; + typedef typename PassT::DominatortreeType DomTreeT; + typedef typename PassT::PostDominatortreeType PostDomTreeT; + typedef typename PassT::DomTreeNodeType DomTreeNodeT; + typedef typename PassT::LoopinfoType LoopInfoT; + + typedef GraphTraits<FuncT *> FuncGTraits; + //typedef FuncGTraits::nodes_iterator BlockIterator; + typedef typename FuncT::iterator BlockIterator; + + typedef typename FuncGTraits::NodeType BlockT; + typedef GraphTraits<BlockT *> BlockGTraits; + typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits; + //typedef BlockGTraits::succ_iterator InstructionIterator; + typedef typename BlockT::iterator InstrIterator; + + typedef CFGStructTraits<PassT> CFGTraits; + typedef BlockInformation<InstrT> BlockInfo; + typedef std::map<BlockT *, BlockInfo *> BlockInfoMap; + + typedef int RegiT; + typedef typename PassT::LoopType LoopT; + typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo; + typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap; + //landing info for loop break + typedef SmallVector<BlockT *, 32> BlockTSmallerVector; + +public: + CFGStructurizer(); + ~CFGStructurizer(); + + /// Perform the CFG structurization + bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); + + /// Perform the CFG preparation + bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); + +private: + void reversePredicateSetter(typename BlockT::iterator); + void orderBlocks(); + void printOrderedBlocks(llvm::raw_ostream &OS); + int patternMatch(BlockT *CurBlock); + int patternMatchGroup(BlockT *CurBlock); + + int serialPatternMatch(BlockT *CurBlock); + int ifPatternMatch(BlockT *CurBlock); + int switchPatternMatch(BlockT *CurBlock); + int loopendPatternMatch(BlockT *CurBlock); + int loopPatternMatch(BlockT *CurBlock); + + int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); + int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); + //int loopWithoutBreak(BlockT *); + + void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, + BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); + void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, + BlockT *ContBlock, LoopT *contLoop); + bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); + int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock); + int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock); + int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock, BlockT **LandBlockPtr); + void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock, BlockT *LandBlock, + bool Detail = false); + PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, + bool AllowSideEntry = true); + BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, + bool AllowSideEntry = true); + int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); + void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); + + void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, + BlockT *TrueBlock, BlockT *FalseBlock, + BlockT *LandBlock); + void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); + void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, + BlockT *ExitLandBlock, RegiT SetReg); + void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, + RegiT SetReg); + BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, + std::set<BlockT*> &ExitBlockSet, + BlockT *ExitLandBlk); + BlockT *addLoopEndbranchBlock(LoopT *LoopRep, + BlockTSmallerVector &ExitingBlocks, + BlockTSmallerVector &ExitBlocks); + BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); + void removeUnconditionalBranch(BlockT *SrcBlock); + void removeRedundantConditionalBranch(BlockT *SrcBlock); + void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks); + + void removeSuccessor(BlockT *SrcBlock); + BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); + BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); + + void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, + InstrIterator InsertPos); + + void recordSccnum(BlockT *SrcBlock, int SCCNum); + int getSCCNum(BlockT *srcBlk); + + void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); + bool isRetiredBlock(BlockT *SrcBlock); + bool isActiveLoophead(BlockT *CurBlock); + bool needMigrateBlock(BlockT *Block); + + BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, + BlockTSmallerVector &exitBlocks, + std::set<BlockT*> &ExitBlockSet); + void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); + BlockT *getLoopLandBlock(LoopT *LoopRep); + LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); + + void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); + void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); + void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); + void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); + void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); + + bool hasBackEdge(BlockT *curBlock); + unsigned getLoopDepth (LoopT *LoopRep); + int countActiveBlock( + typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart, + typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd); + BlockT *findNearestCommonPostDom(std::set<BlockT *>&); + BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); + +private: + DomTreeT *domTree; + PostDomTreeT *postDomTree; + LoopInfoT *loopInfo; + PassT *passRep; + FuncT *funcRep; + + BlockInfoMap blockInfoMap; + LoopLandInfoMap loopLandInfoMap; + SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks; + const AMDGPURegisterInfo *TRI; + +}; //template class CFGStructurizer + +template<class PassT> CFGStructurizer<PassT>::CFGStructurizer() + : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { +} + +template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() { + for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), + E = blockInfoMap.end(); I != E; ++I) { + delete I->second; + } +} + +template<class PassT> +bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass, + const AMDGPURegisterInfo * tri) { + passRep = &pass; + funcRep = &func; + TRI = tri; + + bool changed = false; + + //FIXME: if not reducible flow graph, make it so ??? + + if (DEBUGME) { + errs() << "AMDGPUCFGStructurizer::prepare\n"; + } + + loopInfo = CFGTraits::getLoopInfo(pass); + if (DEBUGME) { + errs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, errs()); + } + + orderBlocks(); + if (DEBUGME) { + errs() << "Ordered blocks:\n"; + printOrderedBlocks(errs()); + } + + SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks; + + for (typename LoopInfoT::iterator iter = loopInfo->begin(), + iterEnd = loopInfo->end(); + iter != iterEnd; ++iter) { + LoopT* loopRep = (*iter); + BlockTSmallerVector exitingBlks; + loopRep->getExitingBlocks(exitingBlks); + + if (exitingBlks.size() == 0) { + BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); + if (dummyExitBlk != NULL) + retBlks.push_back(dummyExitBlk); + } + } + + // Remove unconditional branch instr. + // Add dummy exit block iff there are multiple returns. + + for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator + iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); + iterBlk != iterEndBlk; + ++iterBlk) { + BlockT *curBlk = *iterBlk; + removeUnconditionalBranch(curBlk); + removeRedundantConditionalBranch(curBlk); + if (CFGTraits::isReturnBlock(curBlk)) { + retBlks.push_back(curBlk); + } + assert(curBlk->succ_size() <= 2); + } //for + + if (retBlks.size() >= 2) { + addDummyExitBlock(retBlks); + changed = true; + } + + return changed; +} //CFGStructurizer::prepare + +template<class PassT> +bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass, + const AMDGPURegisterInfo * tri) { + passRep = &pass; + funcRep = &func; + TRI = tri; + + //Assume reducible CFG... + if (DEBUGME) { + errs() << "AMDGPUCFGStructurizer::run\n"; + func.viewCFG(); + } + + domTree = CFGTraits::getDominatorTree(pass); + if (DEBUGME) { + domTree->print(errs(), (const llvm::Module*)0); + } + + postDomTree = CFGTraits::getPostDominatorTree(pass); + if (DEBUGME) { + postDomTree->print(errs()); + } + + loopInfo = CFGTraits::getLoopInfo(pass); + if (DEBUGME) { + errs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, errs()); + } + + orderBlocks(); +#ifdef STRESSTEST + //Use the worse block ordering to test the algorithm. + ReverseVector(orderedBlks); +#endif + + if (DEBUGME) { + errs() << "Ordered blocks:\n"; + printOrderedBlocks(errs()); + } + int numIter = 0; + bool finish = false; + BlockT *curBlk; + bool makeProgress = false; + int numRemainedBlk = countActiveBlock(orderedBlks.begin(), + orderedBlks.end()); + + do { + ++numIter; + if (DEBUGME) { + errs() << "numIter = " << numIter + << ", numRemaintedBlk = " << numRemainedBlk << "\n"; + } + + typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator + iterBlk = orderedBlks.begin(); + typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator + iterBlkEnd = orderedBlks.end(); + + typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator + sccBeginIter = iterBlk; + BlockT *sccBeginBlk = NULL; + int sccNumBlk = 0; // The number of active blocks, init to a + // maximum possible number. + int sccNumIter; // Number of iteration in this SCC. + + while (iterBlk != iterBlkEnd) { + curBlk = *iterBlk; + + if (sccBeginBlk == NULL) { + sccBeginIter = iterBlk; + sccBeginBlk = curBlk; + sccNumIter = 0; + sccNumBlk = numRemainedBlk; // Init to maximum possible number. + if (DEBUGME) { + errs() << "start processing SCC" << getSCCNum(sccBeginBlk); + errs() << "\n"; + } + } + + if (!isRetiredBlock(curBlk)) { + patternMatch(curBlk); + } + + ++iterBlk; + + bool contNextScc = true; + if (iterBlk == iterBlkEnd + || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { + // Just finish one scc. + ++sccNumIter; + int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { + if (DEBUGME) { + errs() << "Can't reduce SCC " << getSCCNum(curBlk) + << ", sccNumIter = " << sccNumIter; + errs() << "doesn't make any progress\n"; + } + contNextScc = true; + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { + sccNumBlk = sccRemainedNumBlk; + iterBlk = sccBeginIter; + contNextScc = false; + if (DEBUGME) { + errs() << "repeat processing SCC" << getSCCNum(curBlk) + << "sccNumIter = " << sccNumIter << "\n"; + func.viewCFG(); + } + } else { + // Finish the current scc. + contNextScc = true; + } + } else { + // Continue on next component in the current scc. + contNextScc = false; + } + + if (contNextScc) { + sccBeginBlk = NULL; + } + } //while, "one iteration" over the function. + + BlockT *entryBlk = FuncGTraits::nodes_begin(&func); + if (entryBlk->succ_size() == 0) { + finish = true; + if (DEBUGME) { + errs() << "Reduce to one block\n"; + } + } else { + int newnumRemainedBlk + = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); + // consider cloned blocks ?? + if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { + makeProgress = true; + numRemainedBlk = newnumRemainedBlk; + } else { + makeProgress = false; + if (DEBUGME) { + errs() << "No progress\n"; + } + } + } + } while (!finish && makeProgress); + + // Misc wrap up to maintain the consistency of the Function representation. + CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); + + // Detach retired Block, release memory. + for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), + iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { + if ((*iterMap).second && (*iterMap).second->isRetired) { + assert(((*iterMap).first)->getNumber() != -1); + if (DEBUGME) { + errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; + } + (*iterMap).first->eraseFromParent(); //Remove from the parent Function. + } + delete (*iterMap).second; + } + blockInfoMap.clear(); + + // clear loopLandInfoMap + for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), + iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { + delete (*iterMap).second; + } + loopLandInfoMap.clear(); + + if (DEBUGME) { + func.viewCFG(); + } + + if (!finish) { + assert(!"IRREDUCIBL_CF"); + } + + return true; +} //CFGStructurizer::run + +/// Print the ordered Blocks. +/// +template<class PassT> +void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) { + size_t i = 0; + for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator + iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); + iterBlk != iterBlkEnd; + ++iterBlk, ++i) { + os << "BB" << (*iterBlk)->getNumber(); + os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; + if (i != 0 && i % 10 == 0) { + os << "\n"; + } else { + os << " "; + } + } +} //printOrderedBlocks + +/// Compute the reversed DFS post order of Blocks +/// +template<class PassT> void CFGStructurizer<PassT>::orderBlocks() { + int sccNum = 0; + BlockT *bb; + for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep), + sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { + std::vector<BlockT *> &sccNext = *sccIter; + for (typename std::vector<BlockT *>::const_iterator + blockIter = sccNext.begin(), blockEnd = sccNext.end(); + blockIter != blockEnd; ++blockIter) { + bb = *blockIter; + orderedBlks.push_back(bb); + recordSccnum(bb, sccNum); + } + } + + //walk through all the block in func to check for unreachable + for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), + blockEnd1 = FuncGTraits::nodes_end(funcRep); + blockIter1 != blockEnd1; ++blockIter1) { + BlockT *bb = &(*blockIter1); + sccNum = getSCCNum(bb); + if (sccNum == INVALIDSCCNUM) { + errs() << "unreachable block BB" << bb->getNumber() << "\n"; + } + } +} //orderBlocks + +template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) { + int numMatch = 0; + int curMatch; + + if (DEBUGME) { + errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; + } + + while ((curMatch = patternMatchGroup(curBlk)) > 0) { + numMatch += curMatch; + } + + if (DEBUGME) { + errs() << "End patternMatch BB" << curBlk->getNumber() + << ", numMatch = " << numMatch << "\n"; + } + + return numMatch; +} //patternMatch + +template<class PassT> +int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) { + int numMatch = 0; + numMatch += serialPatternMatch(curBlk); + numMatch += ifPatternMatch(curBlk); + numMatch += loopendPatternMatch(curBlk); + numMatch += loopPatternMatch(curBlk); + return numMatch; +}//patternMatchGroup + +template<class PassT> +int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) { + if (curBlk->succ_size() != 1) { + return 0; + } + + BlockT *childBlk = *curBlk->succ_begin(); + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { + return 0; + } + + mergeSerialBlock(curBlk, childBlk); + ++numSerialPatternMatch; + return 1; +} //serialPatternMatch + +template<class PassT> +int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) { + //two edges + if (curBlk->succ_size() != 2) { + return 0; + } + + if (hasBackEdge(curBlk)) { + return 0; + } + + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); + if (branchInstr == NULL) { + return 0; + } + + assert(CFGTraits::isCondBranch(branchInstr)); + + BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); + BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); + BlockT *landBlk; + int cloned = 0; + + // TODO: Simplify + if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 + && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { + landBlk = *trueBlk->succ_begin(); + } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { + landBlk = NULL; + } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { + landBlk = falseBlk; + falseBlk = NULL; + } else if (falseBlk->succ_size() == 1 + && *falseBlk->succ_begin() == trueBlk) { + landBlk = trueBlk; + trueBlk = NULL; + } else if (falseBlk->succ_size() == 1 + && isSameloopDetachedContbreak(trueBlk, falseBlk)) { + landBlk = *falseBlk->succ_begin(); + } else if (trueBlk->succ_size() == 1 + && isSameloopDetachedContbreak(falseBlk, trueBlk)) { + landBlk = *trueBlk->succ_begin(); + } else { + return handleJumpintoIf(curBlk, trueBlk, falseBlk); + } + + // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the + // new BB created for landBlk==NULL may introduce new challenge to the + // reduction process. + if (landBlk != NULL && + ((trueBlk && trueBlk->pred_size() > 1) + || (falseBlk && falseBlk->pred_size() > 1))) { + cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); + } + + if (trueBlk && trueBlk->pred_size() > 1) { + trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); + ++cloned; + } + + if (falseBlk && falseBlk->pred_size() > 1) { + falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); + ++cloned; + } + + mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); + + ++numIfPatternMatch; + + numClonedBlock += cloned; + + return 1 + cloned; +} //ifPatternMatch + +template<class PassT> +int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) { + return 0; +} //switchPatternMatch + +template<class PassT> +int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + typename std::vector<LoopT *> nestedLoops; + while (loopRep) { + nestedLoops.push_back(loopRep); + loopRep = loopRep->getParentLoop(); + } + + if (nestedLoops.size() == 0) { + return 0; + } + + // Process nested loop outside->inside, so "continue" to a outside loop won't + // be mistaken as "break" of the current loop. + int num = 0; + for (typename std::vector<LoopT *>::reverse_iterator + iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); + iter != iterEnd; ++iter) { + loopRep = *iter; + + if (getLoopLandBlock(loopRep) != NULL) { + continue; + } + + BlockT *loopHeader = loopRep->getHeader(); + + int numBreak = loopbreakPatternMatch(loopRep, loopHeader); + + if (numBreak == -1) { + break; + } + + int numCont = loopcontPatternMatch(loopRep, loopHeader); + num += numBreak + numCont; + } + + return num; +} //loopendPatternMatch + +template<class PassT> +int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) { + if (curBlk->succ_size() != 0) { + return 0; + } + + int numLoop = 0; + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + while (loopRep && loopRep->getHeader() == curBlk) { + LoopLandInfo *loopLand = getLoopLandInfo(loopRep); + if (loopLand) { + BlockT *landBlk = loopLand->landBlk; + assert(landBlk); + if (!isRetiredBlock(landBlk)) { + mergeLooplandBlock(curBlk, loopLand); + ++numLoop; + } + } + loopRep = loopRep->getParentLoop(); + } + + numLoopPatternMatch += numLoop; + + return numLoop; +} //loopPatternMatch + +template<class PassT> +int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep, + BlockT *loopHeader) { + BlockTSmallerVector exitingBlks; + loopRep->getExitingBlocks(exitingBlks); + + if (DEBUGME) { + errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; + } + + if (exitingBlks.size() == 0) { + setLoopLandBlock(loopRep); + return 0; + } + + // Compute the corresponding exitBlks and exit block set. + BlockTSmallerVector exitBlks; + std::set<BlockT *> exitBlkSet; + for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), + iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { + BlockT *exitingBlk = *iter; + BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); + exitBlks.push_back(exitBlk); + exitBlkSet.insert(exitBlk); //non-duplicate insert + } + + assert(exitBlkSet.size() > 0); + assert(exitBlks.size() == exitingBlks.size()); + + if (DEBUGME) { + errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; + } + + // Find exitLandBlk. + BlockT *exitLandBlk = NULL; + int numCloned = 0; + int numSerial = 0; + + if (exitBlkSet.size() == 1) { + exitLandBlk = *exitBlkSet.begin(); + } else { + exitLandBlk = findNearestCommonPostDom(exitBlkSet); + + if (exitLandBlk == NULL) { + return -1; + } + + bool allInPath = true; + bool allNotInPath = true; + for (typename std::set<BlockT*>::const_iterator + iter = exitBlkSet.begin(), + iterEnd = exitBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *exitBlk = *iter; + + PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); + if (DEBUGME) { + errs() << "BB" << exitBlk->getNumber() + << " to BB" << exitLandBlk->getNumber() << " PathToKind=" + << pathKind << "\n"; + } + + allInPath = allInPath && (pathKind == SinglePath_InPath); + allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); + + if (!allInPath && !allNotInPath) { + if (DEBUGME) { + errs() << "singlePath check fail\n"; + } + return -1; + } + } // check all exit blocks + + if (allNotInPath) { + + // TODO: Simplify, maybe separate function? + LoopT *parentLoopRep = loopRep->getParentLoop(); + BlockT *parentLoopHeader = NULL; + if (parentLoopRep) + parentLoopHeader = parentLoopRep->getHeader(); + + if (exitLandBlk == parentLoopHeader && + (exitLandBlk = relocateLoopcontBlock(parentLoopRep, + loopRep, + exitBlkSet, + exitLandBlk)) != NULL) { + if (DEBUGME) { + errs() << "relocateLoopcontBlock success\n"; + } + } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, + exitingBlks, + exitBlks)) != NULL) { + if (DEBUGME) { + errs() << "insertEndbranchBlock success\n"; + } + } else { + if (DEBUGME) { + errs() << "loop exit fail\n"; + } + return -1; + } + } + + // Handle side entry to exit path. + exitBlks.clear(); + exitBlkSet.clear(); + for (typename BlockTSmallerVector::iterator iterExiting = + exitingBlks.begin(), + iterExitingEnd = exitingBlks.end(); + iterExiting != iterExitingEnd; ++iterExiting) { + BlockT *exitingBlk = *iterExiting; + BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); + BlockT *newExitBlk = exitBlk; + + if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { + newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); + ++numCloned; + } + + numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); + + exitBlks.push_back(newExitBlk); + exitBlkSet.insert(newExitBlk); + } + + for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), + iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit) { + BlockT *exitBlk = *iterExit; + numSerial += serialPatternMatch(exitBlk); + } + + for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), + iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit) { + BlockT *exitBlk = *iterExit; + if (exitBlk->pred_size() > 1) { + if (exitBlk != exitLandBlk) { + return -1; + } + } else { + if (exitBlk != exitLandBlk && + (exitBlk->succ_size() != 1 || + *exitBlk->succ_begin() != exitLandBlk)) { + return -1; + } + } + } + } // else + + exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); + + // Fold break into the breaking block. Leverage across level breaks. + assert(exitingBlks.size() == exitBlks.size()); + for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), + iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit, ++iterExiting) { + BlockT *exitBlk = *iterExit; + BlockT *exitingBlk = *iterExiting; + assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); + LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); + handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); + } + + int numBreak = static_cast<int>(exitingBlks.size()); + numLoopbreakPatternMatch += numBreak; + numClonedBlock += numCloned; + return numBreak + numSerial + numCloned; +} //loopbreakPatternMatch + +template<class PassT> +int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep, + BlockT *loopHeader) { + int numCont = 0; + SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk; + for (typename InvBlockGTraits::ChildIteratorType iter = + InvBlockGTraits::child_begin(loopHeader), + iterEnd = InvBlockGTraits::child_end(loopHeader); + iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + if (loopRep->contains(curBlk)) { + handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), + loopHeader, loopRep); + contBlk.push_back(curBlk); + ++numCont; + } + } + + for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator + iter = contBlk.begin(), iterEnd = contBlk.end(); + iter != iterEnd; ++iter) { + (*iter)->removeSuccessor(loopHeader); + } + + numLoopcontPatternMatch += numCont; + + return numCont; +} //loopcontPatternMatch + + +template<class PassT> +bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk, + BlockT *src2Blk) { + // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the + // same loop with LoopLandInfo without explicitly keeping track of + // loopContBlks and loopBreakBlks, this is a method to get the information. + // + if (src1Blk->succ_size() == 0) { + LoopT *loopRep = loopInfo->getLoopFor(src1Blk); + if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + if (theEntry != NULL) { + if (DEBUGME) { + errs() << "isLoopContBreakBlock yes src1 = BB" + << src1Blk->getNumber() + << " src2 = BB" << src2Blk->getNumber() << "\n"; + } + return true; + } + } + } + return false; +} //isSameloopDetachedContbreak + +template<class PassT> +int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk) { + int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); + if (num == 0) { + if (DEBUGME) { + errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; + } + num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); + } + return num; +} + +template<class PassT> +int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk) { + int num = 0; + BlockT *downBlk; + + //trueBlk could be the common post dominator + downBlk = trueBlk; + + if (DEBUGME) { + errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() + << " true = BB" << trueBlk->getNumber() + << ", numSucc=" << trueBlk->succ_size() + << " false = BB" << falseBlk->getNumber() << "\n"; + } + + while (downBlk) { + if (DEBUGME) { + errs() << "check down = BB" << downBlk->getNumber(); + } + + if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { + if (DEBUGME) { + errs() << " working\n"; + } + + num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); + num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); + + numClonedBlock += num; + num += serialPatternMatch(*headBlk->succ_begin()); + num += serialPatternMatch(*(++headBlk->succ_begin())); + num += ifPatternMatch(headBlk); + assert(num > 0); + + break; + } + if (DEBUGME) { + errs() << " not working\n"; + } + downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; + } // walk down the postDomTree + + return num; +} //handleJumpintoIf + +template<class PassT> +void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT *landBlk, + bool detail) { + errs() << "head = BB" << headBlk->getNumber() + << " size = " << headBlk->size(); + if (detail) { + errs() << "\n"; + headBlk->print(errs()); + errs() << "\n"; + } + + if (trueBlk) { + errs() << ", true = BB" << trueBlk->getNumber() << " size = " + << trueBlk->size() << " numPred = " << trueBlk->pred_size(); + if (detail) { + errs() << "\n"; + trueBlk->print(errs()); + errs() << "\n"; + } + } + if (falseBlk) { + errs() << ", false = BB" << falseBlk->getNumber() << " size = " + << falseBlk->size() << " numPred = " << falseBlk->pred_size(); + if (detail) { + errs() << "\n"; + falseBlk->print(errs()); + errs() << "\n"; + } + } + if (landBlk) { + errs() << ", land = BB" << landBlk->getNumber() << " size = " + << landBlk->size() << " numPred = " << landBlk->pred_size(); + if (detail) { + errs() << "\n"; + landBlk->print(errs()); + errs() << "\n"; + } + } + + errs() << "\n"; +} //showImproveSimpleJumpintoIf + +template<class PassT> +int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT **plandBlk) { + bool migrateTrue = false; + bool migrateFalse = false; + + BlockT *landBlk = *plandBlk; + + assert((trueBlk == NULL || trueBlk->succ_size() <= 1) + && (falseBlk == NULL || falseBlk->succ_size() <= 1)); + + if (trueBlk == falseBlk) { + return 0; + } + + migrateTrue = needMigrateBlock(trueBlk); + migrateFalse = needMigrateBlock(falseBlk); + + if (!migrateTrue && !migrateFalse) { + return 0; + } + + // If we need to migrate either trueBlk and falseBlk, migrate the rest that + // have more than one predecessors. without doing this, its predecessor + // rather than headBlk will have undefined value in initReg. + if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { + migrateTrue = true; + } + if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { + migrateFalse = true; + } + + if (DEBUGME) { + errs() << "before improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + } + + // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk + // + // new: headBlk => if () {initReg = 1; org trueBlk branch} else + // {initReg = 0; org falseBlk branch } + // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} + // => org landBlk + // if landBlk->pred_size() > 2, put the about if-else inside + // if (initReg !=2) {...} + // + // add initReg = initVal to headBlk + + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + unsigned initReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + if (!migrateTrue || !migrateFalse) { + int initVal = migrateTrue ? 0 : 1; + CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); + } + + int numNewBlk = 0; + + if (landBlk == NULL) { + landBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(landBlk); //insert to function + + if (trueBlk) { + trueBlk->addSuccessor(landBlk); + } else { + headBlk->addSuccessor(landBlk); + } + + if (falseBlk) { + falseBlk->addSuccessor(landBlk); + } else { + headBlk->addSuccessor(landBlk); + } + + numNewBlk ++; + } + + bool landBlkHasOtherPred = (landBlk->pred_size() > 2); + + //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" + typename BlockT::iterator insertPos = + CFGTraits::getInstrPos + (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); + + if (landBlkHasOtherPred) { + unsigned immReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); + unsigned cmpResReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + + CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, + initReg, immReg); + CFGTraits::insertCondBranchBefore(landBlk, insertPos, + AMDGPU::IF_PREDICATE_SET, passRep, + cmpResReg, DebugLoc()); + } + + CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET, + passRep, initReg, DebugLoc()); + + if (migrateTrue) { + migrateInstruction(trueBlk, landBlk, insertPos); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 1). + CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); + } + CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); + + if (migrateFalse) { + migrateInstruction(falseBlk, landBlk, insertPos); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 0) + CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); + } + + if (landBlkHasOtherPred) { + // add endif + CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); + + // put initReg = 2 to other predecessors of landBlk + for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), + predIterEnd = landBlk->pred_end(); predIter != predIterEnd; + ++predIter) { + BlockT *curBlk = *predIter; + if (curBlk != trueBlk && curBlk != falseBlk) { + CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); + } + } //for + } + if (DEBUGME) { + errs() << "result from improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + } + + // update landBlk + *plandBlk = landBlk; + + return numNewBlk; +} //improveSimpleJumpintoIf + +template<class PassT> +void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk, + LoopT *exitingLoop, + BlockT *exitBlk, + LoopT *exitLoop, + BlockT *landBlk) { + if (DEBUGME) { + errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) + << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; + } + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + + RegiT initReg = INVALIDREGNUM; + if (exitingLoop != exitLoop) { + initReg = static_cast<int> + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + assert(initReg != INVALIDREGNUM); + addLoopBreakInitReg(exitLoop, initReg); + while (exitingLoop != exitLoop && exitingLoop) { + addLoopBreakOnReg(exitingLoop, initReg); + exitingLoop = exitingLoop->getParentLoop(); + } + assert(exitingLoop == exitLoop); + } + + mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); + +} //handleLoopbreak + +template<class PassT> +void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk, + LoopT *contingLoop, + BlockT *contBlk, + LoopT *contLoop) { + if (DEBUGME) { + errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() + << " header = BB" << contBlk->getNumber() << "\n"; + + errs() << "Trying to continue loop-depth = " + << getLoopDepth(contLoop) + << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; + } + + RegiT initReg = INVALIDREGNUM; + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + if (contingLoop != contLoop) { + initReg = static_cast<int> + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + assert(initReg != INVALIDREGNUM); + addLoopContInitReg(contLoop, initReg); + while (contingLoop && contingLoop->getParentLoop() != contLoop) { + addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg + contingLoop = contingLoop->getParentLoop(); + } + assert(contingLoop && contingLoop->getParentLoop() == contLoop); + addLoopContOnReg(contingLoop, initReg); + } + + settleLoopcontBlock(contingBlk, contBlk, initReg); +} //handleLoopcontBlock + +template<class PassT> +void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { + if (DEBUGME) { + errs() << "serialPattern BB" << dstBlk->getNumber() + << " <= BB" << srcBlk->getNumber() << "\n"; + } + dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end()); + + dstBlk->removeSuccessor(srcBlk); + CFGTraits::cloneSuccessorList(dstBlk, srcBlk); + + removeSuccessor(srcBlk); + retireBlock(dstBlk, srcBlk); +} //mergeSerialBlock + +template<class PassT> +void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr, + BlockT *curBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT *landBlk) { + if (DEBUGME) { + errs() << "ifPattern BB" << curBlk->getNumber(); + errs() << "{ "; + if (trueBlk) { + errs() << "BB" << trueBlk->getNumber(); + } + errs() << " } else "; + errs() << "{ "; + if (falseBlk) { + errs() << "BB" << falseBlk->getNumber(); + } + errs() << " }\n "; + errs() << "landBlock: "; + if (landBlk == NULL) { + errs() << "NULL"; + } else { + errs() << "BB" << landBlk->getNumber(); + } + errs() << "\n"; + } + + int oldOpcode = branchInstr->getOpcode(); + DebugLoc branchDL = branchInstr->getDebugLoc(); + +// transform to +// if cond +// trueBlk +// else +// falseBlk +// endif +// landBlk + + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(curBlk, branchInstr); + CFGTraits::insertCondBranchBefore(branchInstrPos, + CFGTraits::getBranchNzeroOpcode(oldOpcode), + passRep, + branchDL); + + if (trueBlk) { + curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end()); + curBlk->removeSuccessor(trueBlk); + if (landBlk && trueBlk->succ_size()!=0) { + trueBlk->removeSuccessor(landBlk); + } + retireBlock(curBlk, trueBlk); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); + + if (falseBlk) { + curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(), + falseBlk->end()); + curBlk->removeSuccessor(falseBlk); + if (landBlk && falseBlk->succ_size() != 0) { + falseBlk->removeSuccessor(landBlk); + } + retireBlock(curBlk, falseBlk); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); + + branchInstr->eraseFromParent(); + + if (landBlk && trueBlk && falseBlk) { + curBlk->addSuccessor(landBlk); + } + +} //mergeIfthenelseBlock + +template<class PassT> +void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk, + LoopLandInfo *loopLand) { + BlockT *landBlk = loopLand->landBlk; + + if (DEBUGME) { + errs() << "loopPattern header = BB" << dstBlk->getNumber() + << " land = BB" << landBlk->getNumber() << "\n"; + } + + // Loop contInitRegs are init at the beginning of the loop. + for (typename std::set<RegiT>::const_iterator iter = + loopLand->contInitRegs.begin(), + iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + + /* we last inserterd the DebugLoc in the + * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. + * search for the DebugLoc in the that statement. + * if not found, we have to insert the empty/default DebugLoc */ + InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); + DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); + + CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); + // Loop breakInitRegs are init before entering the loop. + for (typename std::set<RegiT>::const_iterator iter = + loopLand->breakInitRegs.begin(), + iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + // Loop endbranchInitRegs are init before entering the loop. + for (typename std::set<RegiT>::const_iterator iter = + loopLand->endbranchInitRegs.begin(), + iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + + /* we last inserterd the DebugLoc in the continue statement in the current dstBlk + * search for the DebugLoc in the continue statement. + * if not found, we have to insert the empty/default DebugLoc */ + InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); + DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); + + CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); + // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this + // loop. + for (typename std::set<RegiT>::const_iterator iter = + loopLand->breakOnRegs.begin(), + iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep, + *iter); + } + + // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this + // loop. + for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(), + iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, + passRep, *iter); + } + + dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); + + for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), + iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { + dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. + } + + removeSuccessor(landBlk); + retireBlock(dstBlk, landBlk); +} //mergeLooplandBlock + +template<class PassT> +void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) { + while (I--) { + if (I->getOpcode() == AMDGPU::PRED_X) { + switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { + case OPCODE_IS_ZERO_INT: + static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); + return; + case OPCODE_IS_NOT_ZERO_INT: + static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); + return; + case OPCODE_IS_ZERO: + static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); + return; + case OPCODE_IS_NOT_ZERO: + static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO); + return; + default: + assert(0 && "PRED_X Opcode invalid!"); + } + } + } +} + +template<class PassT> +void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk, + BlockT *exitBlk, + BlockT *exitLandBlk, + RegiT setReg) { + if (DEBUGME) { + errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() + << " exit = BB" << exitBlk->getNumber() + << " land = BB" << exitLandBlk->getNumber() << "\n"; + } + + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); + assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); + + DebugLoc DL = branchInstr->getDebugLoc(); + + BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); + + // transform exitingBlk to + // if ( ) { + // exitBlk (if exitBlk != exitLandBlk) + // setReg = 1 + // break + // }endif + // successor = {orgSuccessor(exitingBlk) - exitBlk} + + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(exitingBlk, branchInstr); + + if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { + //break_logical + + if (trueBranch != exitBlk) { + reversePredicateSetter(branchInstrPos); + } + CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); + } else { + if (trueBranch != exitBlk) { + reversePredicateSetter(branchInstr); + } + CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); + if (exitBlk != exitLandBlk) { + //splice is insert-before ... + exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), + exitBlk->end()); + } + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); + } //if_logical + + //now branchInst can be erase safely + branchInstr->eraseFromParent(); + + //now take care of successors, retire blocks + exitingBlk->removeSuccessor(exitBlk); + if (exitBlk != exitLandBlk) { + //splice is insert-before ... + exitBlk->removeSuccessor(exitLandBlk); + retireBlock(exitingBlk, exitBlk); + } + +} //mergeLoopbreakBlock + +template<class PassT> +void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk, + BlockT *contBlk, + RegiT setReg) { + if (DEBUGME) { + errs() << "settleLoopcontBlock conting = BB" + << contingBlk->getNumber() + << ", cont = BB" << contBlk->getNumber() << "\n"; + } + + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); + if (branchInstr) { + assert(CFGTraits::isCondBranch(branchInstr)); + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(contingBlk, branchInstr); + BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); + int oldOpcode = branchInstr->getOpcode(); + DebugLoc DL = branchInstr->getDebugLoc(); + + // transform contingBlk to + // if () { + // move instr after branchInstr + // continue + // or + // setReg = 1 + // break + // }endif + // successor = {orgSuccessor(contingBlk) - loopHeader} + + bool useContinueLogical = + (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); + + if (useContinueLogical == false) { + int branchOpcode = + trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) + : CFGTraits::getBranchZeroOpcode(oldOpcode); + + CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); + } else { + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); + } + + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); + } else { + int branchOpcode = + trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) + : CFGTraits::getContinueZeroOpcode(oldOpcode); + + CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + } + + branchInstr->eraseFromParent(); + } else { + // if we've arrived here then we've already erased the branch instruction + // travel back up the basic block to see the last reference of our debug location + // we've just inserted that reference here so it should be representative + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); + } else { + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); + } + } //else + +} //settleLoopcontBlock + +// BBs in exitBlkSet are determined as in break-path for loopRep, +// before we can put code for BBs as inside loop-body for loopRep +// check whether those BBs are determined as cont-BB for parentLoopRep +// earlier. +// If so, generate a new BB newBlk +// (1) set newBlk common successor of BBs in exitBlkSet +// (2) change the continue-instr in BBs in exitBlkSet to break-instr +// (3) generate continue-instr in newBlk +// +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep, + LoopT *loopRep, + std::set<BlockT *> &exitBlkSet, + BlockT *exitLandBlk) { + std::set<BlockT *> endBlkSet; + + + + for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(), + iterEnd = exitBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *exitBlk = *iter; + BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); + + if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) + return NULL; + + endBlkSet.insert(endBlk); + } + + BlockT *newBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(newBlk); //insert to function + CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); + SHOWNEWBLK(newBlk, "New continue block: "); + + for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(), + iterEnd = endBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *endBlk = *iter; + InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); + if (contInstr) { + contInstr->eraseFromParent(); + } + endBlk->addSuccessor(newBlk); + if (DEBUGME) { + errs() << "Add new continue Block to BB" + << endBlk->getNumber() << " successors\n"; + } + } + + return newBlk; +} //relocateLoopcontBlock + + +// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as +// LoopLandBlock. This BB branch on the loop endBranchInit register to the +// pathes corresponding to the loop exiting branches. + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep, + BlockTSmallerVector &exitingBlks, + BlockTSmallerVector &exitBlks) { + const AMDGPUInstrInfo *tii = + static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + + RegiT endBranchReg = static_cast<int> + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + assert(endBranchReg >= 0); + + // reg = 0 before entering the loop + addLoopEndbranchInitReg(loopRep, endBranchReg); + + uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size()); + assert(numBlks >=2 && numBlks == exitBlks.size()); + + BlockT *preExitingBlk = exitingBlks[0]; + BlockT *preExitBlk = exitBlks[0]; + BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(preBranchBlk); //insert to function + SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); + + BlockT *newLandBlk = preBranchBlk; + + CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, + newLandBlk); + preExitingBlk->removeSuccessor(preExitBlk); + preExitingBlk->addSuccessor(newLandBlk); + + //it is redundant to add reg = 0 to exitingBlks[0] + + // For 1..n th exiting path (the last iteration handles two pathes) create the + // branch to the previous path and the current path. + for (uint32_t i = 1; i < numBlks; ++i) { + BlockT *curExitingBlk = exitingBlks[i]; + BlockT *curExitBlk = exitBlks[i]; + BlockT *curBranchBlk; + + if (i == numBlks - 1) { + curBranchBlk = curExitBlk; + } else { + curBranchBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(curBranchBlk); //insert to function + SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); + } + + // Add reg = i to exitingBlks[i]. + CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, + endBranchReg, i); + + // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge + // (exitingBlks[i], newLandBlk). + CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, + newLandBlk); + curExitingBlk->removeSuccessor(curExitBlk); + curExitingBlk->addSuccessor(newLandBlk); + + // add to preBranchBlk the branch instruction: + // if (endBranchReg == preVal) + // preExitBlk + // else + // curBranchBlk + // + // preValReg = i - 1 + + DebugLoc DL; + RegiT preValReg = static_cast<int> + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + + preBranchBlk->insert(preBranchBlk->begin(), + tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, + i - 1)); + + // condResReg = (endBranchReg == preValReg) + RegiT condResReg = static_cast<int> + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) + .addReg(endBranchReg).addReg(preValReg); + + BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) + .addMBB(preExitBlk).addReg(condResReg); + + preBranchBlk->addSuccessor(preExitBlk); + preBranchBlk->addSuccessor(curBranchBlk); + + // Update preExitingBlk, preExitBlk, preBranchBlk. + preExitingBlk = curExitingBlk; + preExitBlk = curExitBlk; + preBranchBlk = curBranchBlk; + + } //end for 1 .. n blocks + + return newLandBlk; +} //addLoopEndbranchBlock + +template<class PassT> +typename CFGStructurizer<PassT>::PathToKind +CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, + bool allowSideEntry) { + assert(dstBlk); + + if (srcBlk == dstBlk) { + return SinglePath_InPath; + } + + while (srcBlk && srcBlk->succ_size() == 1) { + srcBlk = *srcBlk->succ_begin(); + if (srcBlk == dstBlk) { + return SinglePath_InPath; + } + + if (!allowSideEntry && srcBlk->pred_size() > 1) { + return Not_SinglePath; + } + } + + if (srcBlk && srcBlk->succ_size()==0) { + return SinglePath_NotInPath; + } + + return Not_SinglePath; +} //singlePathTo + +// If there is a single path from srcBlk to dstBlk, return the last block before +// dstBlk If there is a single path from srcBlk->end without dstBlk, return the +// last block in the path Otherwise, return NULL +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, + bool allowSideEntry) { + assert(dstBlk); + + if (srcBlk == dstBlk) { + return srcBlk; + } + + if (srcBlk->succ_size() == 0) { + return srcBlk; + } + + while (srcBlk && srcBlk->succ_size() == 1) { + BlockT *preBlk = srcBlk; + + srcBlk = *srcBlk->succ_begin(); + if (srcBlk == NULL) { + return preBlk; + } + + if (!allowSideEntry && srcBlk->pred_size() > 1) { + return NULL; + } + } + + if (srcBlk && srcBlk->succ_size()==0) { + return srcBlk; + } + + return NULL; + +} //singlePathEnd + +template<class PassT> +int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, + BlockT *dstBlk) { + int cloned = 0; + assert(preBlk->isSuccessor(srcBlk)); + while (srcBlk && srcBlk != dstBlk) { + assert(srcBlk->succ_size() == 1); + if (srcBlk->pred_size() > 1) { + srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); + ++cloned; + } + + preBlk = srcBlk; + srcBlk = *srcBlk->succ_begin(); + } + + return cloned; +} //cloneOnSideEntryTo + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk, + BlockT *predBlk) { + assert(predBlk->isSuccessor(curBlk) && + "succBlk is not a prececessor of curBlk"); + + BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions + CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); + //srcBlk, oldBlk, newBlk + + predBlk->removeSuccessor(curBlk); + predBlk->addSuccessor(cloneBlk); + + // add all successor to cloneBlk + CFGTraits::cloneSuccessorList(cloneBlk, curBlk); + + numClonedInstr += curBlk->size(); + + if (DEBUGME) { + errs() << "Cloned block: " << "BB" + << curBlk->getNumber() << "size " << curBlk->size() << "\n"; + } + + SHOWNEWBLK(cloneBlk, "result of Cloned block: "); + + return cloneBlk; +} //cloneBlockForPredecessor + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep, + BlockT *exitingBlk) { + BlockT *exitBlk = NULL; + + for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), + iterSuccEnd = exitingBlk->succ_end(); + iterSucc != iterSuccEnd; ++iterSucc) { + BlockT *curBlk = *iterSucc; + if (!loopRep->contains(curBlk)) { + assert(exitBlk == NULL); + exitBlk = curBlk; + } + } + + assert(exitBlk != NULL); + + return exitBlk; +} //exitingBlock2ExitBlock + +template<class PassT> +void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk, + BlockT *dstBlk, + InstrIterator insertPos) { + InstrIterator spliceEnd; + //look for the input branchinstr, not the AMDGPU branchinstr + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); + if (branchInstr == NULL) { + if (DEBUGME) { + errs() << "migrateInstruction don't see branch instr\n" ; + } + spliceEnd = srcBlk->end(); + } else { + if (DEBUGME) { + errs() << "migrateInstruction see branch instr\n" ; + branchInstr->dump(); + } + spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); + } + if (DEBUGME) { + errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() + << "srcSize = " << srcBlk->size() << "\n"; + } + + //splice insert before insertPos + dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); + + if (DEBUGME) { + errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() + << "srcSize = " << srcBlk->size() << "\n"; + } +} //migrateInstruction + +// normalizeInfiniteLoopExit change +// B1: +// uncond_br LoopHeader +// +// to +// B1: +// cond_br 1 LoopHeader dummyExit +// and return the newly added dummy exit block +// +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) { + BlockT *loopHeader; + BlockT *loopLatch; + loopHeader = LoopRep->getHeader(); + loopLatch = LoopRep->getLoopLatch(); + BlockT *dummyExitBlk = NULL; + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + if (loopHeader!=NULL && loopLatch!=NULL) { + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); + if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { + dummyExitBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(dummyExitBlk); //insert to function + SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); + + if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; + + typename BlockT::iterator insertPos = + CFGTraits::getInstrPos(loopLatch, branchInstr); + unsigned immReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); + InstrT *newInstr = + CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); + MachineInstrBuilder MIB(*funcRep, newInstr); + MIB.addMBB(loopHeader); + MIB.addReg(immReg, false); + + SHOWNEWINSTR(newInstr); + + branchInstr->eraseFromParent(); + loopLatch->addSuccessor(dummyExitBlk); + } + } + + return dummyExitBlk; +} //normalizeInfiniteLoopExit + +template<class PassT> +void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) { + InstrT *branchInstr; + + // I saw two unconditional branch in one basic block in example + // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. + while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) + && CFGTraits::isUncondBranch(branchInstr)) { + if (DEBUGME) { + errs() << "Removing unconditional branch instruction" ; + branchInstr->dump(); + } + branchInstr->eraseFromParent(); + } +} //removeUnconditionalBranch + +template<class PassT> +void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) { + if (srcBlk->succ_size() == 2) { + BlockT *blk1 = *srcBlk->succ_begin(); + BlockT *blk2 = *(++srcBlk->succ_begin()); + + if (blk1 == blk2) { + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); + assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); + if (DEBUGME) { + errs() << "Removing unneeded conditional branch instruction" ; + branchInstr->dump(); + } + branchInstr->eraseFromParent(); + SHOWNEWBLK(blk1, "Removing redundant successor"); + srcBlk->removeSuccessor(blk1); + } + } +} //removeRedundantConditionalBranch + +template<class PassT> +void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*, + DEFAULT_VEC_SLOTS> &retBlks) { + BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(dummyExitBlk); //insert to function + CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); + + for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter = + retBlks.begin(), + iterEnd = retBlks.end(); iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); + if (curInstr) { + curInstr->eraseFromParent(); + } + curBlk->addSuccessor(dummyExitBlk); + if (DEBUGME) { + errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() + << " successors\n"; + } + } //for + + SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); +} //addDummyExitBlock + +template<class PassT> +void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) { + while (srcBlk->succ_size()) { + srcBlk->removeSuccessor(*srcBlk->succ_begin()); + } +} + +template<class PassT> +void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) { + BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + + if (srcBlkInfo == NULL) { + srcBlkInfo = new BlockInfo(); + } + + srcBlkInfo->sccNum = sccNum; +} + +template<class PassT> +int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) { + BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; + return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; +} + +template<class PassT> +void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { + if (DEBUGME) { + errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; + } + + BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + + if (srcBlkInfo == NULL) { + srcBlkInfo = new BlockInfo(); + } + + srcBlkInfo->isRetired = true; + assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 + && "can't retire block yet"); +} + +template<class PassT> +bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) { + BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; + return (srcBlkInfo && srcBlkInfo->isRetired); +} + +template<class PassT> +bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + while (loopRep && loopRep->getHeader() == curBlk) { + LoopLandInfo *loopLand = getLoopLandInfo(loopRep); + + if(loopLand == NULL) + return true; + + BlockT *landBlk = loopLand->landBlk; + assert(landBlk); + if (!isRetiredBlock(landBlk)) { + return true; + } + + loopRep = loopRep->getParentLoop(); + } + + return false; +} //isActiveLoophead + +template<class PassT> +bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) { + const unsigned blockSizeThreshold = 30; + const unsigned cloneInstrThreshold = 100; + + bool multiplePreds = blk && (blk->pred_size() > 1); + + if(!multiplePreds) + return false; + + unsigned blkSize = blk->size(); + return ((blkSize > blockSizeThreshold) + && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); +} //needMigrateBlock + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, + BlockTSmallerVector &exitBlks, + std::set<BlockT *> &exitBlkSet) { + SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks + + for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), + predIterEnd = landBlk->pred_end(); + predIter != predIterEnd; ++predIter) { + BlockT *curBlk = *predIter; + if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { + inpathBlks.push_back(curBlk); + } + } //for + + //if landBlk has predecessors that are not in the given loop, + //create a new block + BlockT *newLandBlk = landBlk; + if (inpathBlks.size() != landBlk->pred_size()) { + newLandBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(newLandBlk); //insert to function + newLandBlk->addSuccessor(landBlk); + for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter = + inpathBlks.begin(), + iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); + //srcBlk, oldBlk, newBlk + curBlk->removeSuccessor(landBlk); + curBlk->addSuccessor(newLandBlk); + } + for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { + if (exitBlks[i] == landBlk) { + exitBlks[i] = newLandBlk; + } + } + SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); + } + + setLoopLandBlock(loopRep, newLandBlk); + + return newLandBlk; +} // recordLoopbreakLand + +template<class PassT> +void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + assert(theEntry->landBlk == NULL); + + if (blk == NULL) { + blk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(blk); //insert to function + SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); + } + + theEntry->landBlk = blk; + + if (DEBUGME) { + errs() << "setLoopLandBlock loop-header = BB" + << loopRep->getHeader()->getNumber() + << " landing-block = BB" << blk->getNumber() << "\n"; + } +} // setLoopLandBlock + +template<class PassT> +void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + + theEntry->breakOnRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopBreakOnReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopBreakOnReg + +template<class PassT> +void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->contOnRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopContOnReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopContOnReg + +template<class PassT> +void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->breakInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopBreakInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopBreakInitReg + +template<class PassT> +void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->contInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopContInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopContInitReg + +template<class PassT> +void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep, + RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->endbranchInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopEndbranchInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopEndbranchInitReg + +template<class PassT> +typename CFGStructurizer<PassT>::LoopLandInfo * +CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + return theEntry; +} // getLoopLandInfo + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + return theEntry ? theEntry->landBlk : NULL; +} // getLoopLandBlock + + +template<class PassT> +bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + if (loopRep == NULL) + return false; + + BlockT *loopHeader = loopRep->getHeader(); + + return curBlk->isSuccessor(loopHeader); + +} //hasBackEdge + +template<class PassT> +unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) { + return loopRep ? loopRep->getLoopDepth() : 0; +} //getLoopDepth + +template<class PassT> +int CFGStructurizer<PassT>::countActiveBlock +(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart, + typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) { + int count = 0; + while (iterStart != iterEnd) { + if (!isRetiredBlock(*iterStart)) { + ++count; + } + ++iterStart; + } + + return count; +} //countActiveBlock + +// This is work around solution for findNearestCommonDominator not avaiable to +// post dom a proper fix should go to Dominators.h. + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT* +CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { + + if (postDomTree->dominates(blk1, blk2)) { + return blk1; + } + if (postDomTree->dominates(blk2, blk1)) { + return blk2; + } + + DomTreeNodeT *node1 = postDomTree->getNode(blk1); + DomTreeNodeT *node2 = postDomTree->getNode(blk2); + + // Handle newly cloned node. + if (node1 == NULL && blk1->succ_size() == 1) { + return findNearestCommonPostDom(*blk1->succ_begin(), blk2); + } + if (node2 == NULL && blk2->succ_size() == 1) { + return findNearestCommonPostDom(blk1, *blk2->succ_begin()); + } + + if (node1 == NULL || node2 == NULL) { + return NULL; + } + + node1 = node1->getIDom(); + while (node1) { + if (postDomTree->dominates(node1, node2)) { + return node1->getBlock(); + } + node1 = node1->getIDom(); + } + + return NULL; +} + +template<class PassT> +typename CFGStructurizer<PassT>::BlockT * +CFGStructurizer<PassT>::findNearestCommonPostDom +(typename std::set<BlockT *> &blks) { + BlockT *commonDom; + typename std::set<BlockT *>::const_iterator iter = blks.begin(); + typename std::set<BlockT *>::const_iterator iterEnd = blks.end(); + for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { + BlockT *curBlk = *iter; + if (curBlk != commonDom) { + commonDom = findNearestCommonPostDom(curBlk, commonDom); + } + } + + if (DEBUGME) { + errs() << "Common post dominator for exit blocks is "; + if (commonDom) { + errs() << "BB" << commonDom->getNumber() << "\n"; + } else { + errs() << "NULL\n"; + } + } + + return commonDom; +} //findNearestCommonPostDom + +} //end namespace llvm + +//todo: move-end + + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer for AMDGPU +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm { +class AMDGPUCFGStructurizer : public MachineFunctionPass { +public: + typedef MachineInstr InstructionType; + typedef MachineFunction FunctionType; + typedef MachineBasicBlock BlockType; + typedef MachineLoopInfo LoopinfoType; + typedef MachineDominatorTree DominatortreeType; + typedef MachinePostDominatorTree PostDominatortreeType; + typedef MachineDomTreeNode DomTreeNodeType; + typedef MachineLoop LoopType; + +protected: + TargetMachine &TM; + const TargetInstrInfo *TII; + const AMDGPURegisterInfo *TRI; + +public: + AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); + const TargetInstrInfo *getTargetInstrInfo() const; + +private: + +}; + +} //end of namespace llvm +AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm) +: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), + TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) { +} + +const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { + return TII; +} +//===----------------------------------------------------------------------===// +// +// CFGPrepare +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm { +class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer { +public: + static char ID; + +public: + AMDGPUCFGPrepare(TargetMachine &tm); + + virtual const char *getPassName() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &F); + +private: + +}; + +char AMDGPUCFGPrepare::ID = 0; +} //end of namespace llvm + +AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) + : AMDGPUCFGStructurizer(ID, tm ) { +} +const char *AMDGPUCFGPrepare::getPassName() const { + return "AMD IL Control Flow Graph Preparation Pass"; +} + +void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addRequired<MachineFunctionAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); +} + +//===----------------------------------------------------------------------===// +// +// CFGPerform +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm { +class AMDGPUCFGPerform : public AMDGPUCFGStructurizer { +public: + static char ID; + +public: + AMDGPUCFGPerform(TargetMachine &tm); + virtual const char *getPassName() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &F); + +private: + +}; + +char AMDGPUCFGPerform::ID = 0; +} //end of namespace llvm + + AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) +: AMDGPUCFGStructurizer(ID, tm) { +} + +const char *AMDGPUCFGPerform::getPassName() const { + return "AMD IL Control Flow Graph structurizer Pass"; +} + +void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addRequired<MachineFunctionAnalysis>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); +} + +//===----------------------------------------------------------------------===// +// +// CFGStructTraits<AMDGPUCFGStructurizer> +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct { +// this class is tailor to the AMDGPU backend +template<> +struct CFGStructTraits<AMDGPUCFGStructurizer> { + typedef int RegiT; + + static int getBranchNzeroOpcode(int oldOpcode) { + switch(oldOpcode) { + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; + default: + assert(0 && "internal error"); + } + return -1; + } + + static int getBranchZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; + default: + assert(0 && "internal error"); + } + return -1; + } + + static int getContinueNzeroOpcode(int oldOpcode) { + switch(oldOpcode) { + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getContinueZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; + default: + assert(0 && "internal error"); + } + return -1; + } + + static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { + return instr->getOperand(0).getMBB(); + } + + static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { + instr->getOperand(0).setMBB(blk); + } + + static MachineBasicBlock * + getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { + assert(blk->succ_size() == 2); + MachineBasicBlock *trueBranch = getTrueBranch(instr); + MachineBasicBlock::succ_iterator iter = blk->succ_begin(); + MachineBasicBlock::succ_iterator iterNext = iter; + ++iterNext; + + return (*iter == trueBranch) ? *iterNext : *iter; + } + + static bool isCondBranch(MachineInstr *instr) { + switch (instr->getOpcode()) { + case AMDGPU::JUMP: + return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; + case AMDGPU::BRANCH_COND_i32: + case AMDGPU::BRANCH_COND_f32: + break; + default: + return false; + } + return true; + } + + static bool isUncondBranch(MachineInstr *instr) { + switch (instr->getOpcode()) { + case AMDGPU::JUMP: + return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; + case AMDGPU::BRANCH: + return true; + default: + return false; + } + return true; + } + + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { + //get DebugLoc from the first MachineBasicBlock instruction with debug info + DebugLoc DL; + for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { + MachineInstr *instr = &(*iter); + if (instr->getDebugLoc().isUnknown() == false) { + DL = instr->getDebugLoc(); + } + } + return DL; + } + + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + MachineInstr *instr = &*iter; + if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { + return instr; + } + return NULL; + } + + // The correct naming for this is getPossibleLoopendBlockBranchInstr. + // + // BB with backward-edge could have move instructions after the branch + // instruction. Such move instruction "belong to" the loop backward-edge. + // + static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { + const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>( + blk->getParent()->getTarget().getInstrInfo()); + + for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), + iterEnd = blk->rend(); iter != iterEnd; ++iter) { + // FIXME: Simplify + MachineInstr *instr = &*iter; + if (instr) { + if (isCondBranch(instr) || isUncondBranch(instr)) { + return instr; + } else if (!TII->isMov(instr->getOpcode())) { + break; + } + } + } + return NULL; + } + + static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + if (iter != blk->rend()) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDGPU::RETURN) { + return instr; + } + } + return NULL; + } + + static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + if (iter != blk->rend()) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDGPU::CONTINUE) { + return instr; + } + } + return NULL; + } + + static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { + for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) { + return instr; + } + } + return NULL; + } + + static bool isReturnBlock(MachineBasicBlock *blk) { + MachineInstr *instr = getReturnInstr(blk); + bool isReturn = (blk->succ_size() == 0); + if (instr) { + assert(isReturn); + } else if (isReturn) { + if (DEBUGME) { + errs() << "BB" << blk->getNumber() + <<" is return block without RETURN instr\n"; + } + } + + return isReturn; + } + + static MachineBasicBlock::iterator + getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { + assert(instr->getParent() == blk && "instruction doesn't belong to block"); + MachineBasicBlock::iterator iter = blk->begin(); + MachineBasicBlock::iterator iterEnd = blk->end(); + while (&(*iter) != instr && iter != iterEnd) { + ++iter; + } + + assert(iter != iterEnd); + return iter; + }//getInstrPos + + static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, + AMDGPUCFGStructurizer *passRep) { + return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); + } //insertInstrBefore + + static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, + AMDGPUCFGStructurizer *passRep, DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); + + MachineBasicBlock::iterator res; + if (blk->begin() != blk->end()) { + blk->insert(blk->begin(), newInstr); + } else { + blk->push_back(newInstr); + } + + SHOWNEWINSTR(newInstr); + + return newInstr; + } //insertInstrBefore + + static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, + AMDGPUCFGStructurizer *passRep) { + insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); + } //insertInstrEnd + + static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, + AMDGPUCFGStructurizer *passRep, DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = blk->getParent() + ->CreateMachineInstr(tii->get(newOpcode), DL); + + blk->push_back(newInstr); + //assume the instruction doesn't take any reg operand ... + + SHOWNEWINSTR(newInstr); + } //insertInstrEnd + + static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, + int newOpcode, + AMDGPUCFGStructurizer *passRep) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), + DebugLoc()); + + blk->insert(instrPos, newInstr); + //assume the instruction doesn't take any reg operand ... + + SHOWNEWINSTR(newInstr); + return newInstr; + } //insertInstrBefore + + static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, + int newOpcode, + AMDGPUCFGStructurizer *passRep, + DebugLoc DL) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineFunction *MF = blk->getParent(); + MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); + + blk->insert(instrPos, newInstr); + MachineInstrBuilder MIB(*MF, newInstr); + MIB.addReg(oldInstr->getOperand(1).getReg(), false); + + SHOWNEWINSTR(newInstr); + //erase later oldInstr->eraseFromParent(); + } //insertCondBranchBefore + + static void insertCondBranchBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator insertPos, + int newOpcode, + AMDGPUCFGStructurizer *passRep, + RegiT regNum, + DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineFunction *MF = blk->getParent(); + + MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); + + //insert before + blk->insert(insertPos, newInstr); + MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); + + SHOWNEWINSTR(newInstr); + } //insertCondBranchBefore + + static void insertCondBranchEnd(MachineBasicBlock *blk, + int newOpcode, + AMDGPUCFGStructurizer *passRep, + RegiT regNum) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineFunction *MF = blk->getParent(); + MachineInstr *newInstr = + MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); + + blk->push_back(newInstr); + MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); + + SHOWNEWINSTR(newInstr); + } //insertCondBranchEnd + + + static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, + AMDGPUCFGStructurizer *passRep, + RegiT regNum, int regVal) { + MachineInstr *oldInstr = &(*instrPos); + const AMDGPUInstrInfo *tii = + static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, + regVal); + blk->insert(instrPos, newInstr); + + SHOWNEWINSTR(newInstr); + } //insertAssignInstrBefore + + static void insertAssignInstrBefore(MachineBasicBlock *blk, + AMDGPUCFGStructurizer *passRep, + RegiT regNum, int regVal) { + const AMDGPUInstrInfo *tii = + static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); + + MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, + regVal); + if (blk->begin() != blk->end()) { + blk->insert(blk->begin(), newInstr); + } else { + blk->push_back(newInstr); + } + + SHOWNEWINSTR(newInstr); + + } //insertInstrBefore + + static void insertCompareInstrBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator instrPos, + AMDGPUCFGStructurizer *passRep, + RegiT dstReg, RegiT src1Reg, + RegiT src2Reg) { + const AMDGPUInstrInfo *tii = + static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); + MachineFunction *MF = blk->getParent(); + MachineInstr *newInstr = + MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); + + MachineInstrBuilder MIB(*MF, newInstr); + MIB.addReg(dstReg, RegState::Define); //set target + MIB.addReg(src1Reg); //set src value + MIB.addReg(src2Reg); //set src value + + blk->insert(instrPos, newInstr); + SHOWNEWINSTR(newInstr); + + } //insertCompareInstrBefore + + static void cloneSuccessorList(MachineBasicBlock *dstBlk, + MachineBasicBlock *srcBlk) { + for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), + iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { + dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of + } + } //cloneSuccessorList + + static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { + MachineFunction *func = srcBlk->getParent(); + MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); + func->push_back(newBlk); //insert to function + for (MachineBasicBlock::iterator iter = srcBlk->begin(), + iterEnd = srcBlk->end(); + iter != iterEnd; ++iter) { + MachineInstr *instr = func->CloneMachineInstr(iter); + newBlk->push_back(instr); + } + return newBlk; + } + + //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because + //the AMDGPU instruction is not recognized as terminator fix this and retire + //this routine + static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, + MachineBasicBlock *oldBlk, + MachineBasicBlock *newBlk) { + MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); + if (branchInstr && isCondBranch(branchInstr) && + getTrueBranch(branchInstr) == oldBlk) { + setTrueBranch(branchInstr, newBlk); + } + } + + static void wrapup(MachineBasicBlock *entryBlk) { + assert((!entryBlk->getParent()->getJumpTableInfo() + || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) + && "found a jump table"); + + //collect continue right before endloop + SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr; + MachineBasicBlock::iterator pre = entryBlk->begin(); + MachineBasicBlock::iterator iterEnd = entryBlk->end(); + MachineBasicBlock::iterator iter = pre; + while (iter != iterEnd) { + if (pre->getOpcode() == AMDGPU::CONTINUE + && iter->getOpcode() == AMDGPU::ENDLOOP) { + contInstr.push_back(pre); + } + pre = iter; + ++iter; + } //end while + + //delete continue right before endloop + for (unsigned i = 0; i < contInstr.size(); ++i) { + contInstr[i]->eraseFromParent(); + } + + // TODO to fix up jump table so later phase won't be confused. if + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but + // there isn't such an interface yet. alternatively, replace all the other + // blocks in the jump table with the entryBlk //} + + } //wrapup + + static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { + return &pass.getAnalysis<MachineDominatorTree>(); + } + + static MachinePostDominatorTree* + getPostDominatorTree(AMDGPUCFGStructurizer &pass) { + return &pass.getAnalysis<MachinePostDominatorTree>(); + } + + static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { + return &pass.getAnalysis<MachineLoopInfo>(); + } +}; // template class CFGStructTraits +} //end of namespace llvm + +// createAMDGPUCFGPreparationPass- Returns a pass +FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm + ) { + return new AMDGPUCFGPrepare(tm ); +} + +bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { + return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, + *this, + TRI); +} + +// createAMDGPUCFGStructurizerPass- Returns a pass +FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm + ) { + return new AMDGPUCFGPerform(tm ); +} + +bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { + return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func, + *this, + TRI); +} diff --git a/lib/Target/R600/AMDILDevice.cpp b/lib/Target/R600/AMDILDevice.cpp new file mode 100644 index 0000000000..eec5059de2 --- /dev/null +++ b/lib/Target/R600/AMDILDevice.cpp @@ -0,0 +1,124 @@ +//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// +#include "AMDILDevice.h" +#include "AMDGPUSubtarget.h" + +using namespace llvm; +// Default implementation for all of the classes. +AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST) { + mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); + mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities); + setCaps(); + DeviceFlag = OCL_DEVICE_ALL; +} + +AMDGPUDevice::~AMDGPUDevice() { + mHWBits.clear(); + mSWBits.clear(); +} + +size_t AMDGPUDevice::getMaxGDSSize() const { + return 0; +} + +uint32_t +AMDGPUDevice::getDeviceFlag() const { + return DeviceFlag; +} + +size_t AMDGPUDevice::getMaxNumCBs() const { + if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { + return HW_MAX_NUM_CB; + } + + return 0; +} + +size_t AMDGPUDevice::getMaxCBSize() const { + if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) { + return MAX_CB_SIZE; + } + + return 0; +} + +size_t AMDGPUDevice::getMaxScratchSize() const { + return 65536; +} + +uint32_t AMDGPUDevice::getStackAlignment() const { + return 16; +} + +void AMDGPUDevice::setCaps() { + mSWBits.set(AMDGPUDeviceInfo::HalfOps); + mSWBits.set(AMDGPUDeviceInfo::ByteOps); + mSWBits.set(AMDGPUDeviceInfo::ShortOps); + mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); + if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) { + mSWBits.set(AMDGPUDeviceInfo::NoInline); + } + if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) { + mSWBits.set(AMDGPUDeviceInfo::MacroDB); + } + if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { + mSWBits.set(AMDGPUDeviceInfo::ConstantMem); + } else { + mHWBits.set(AMDGPUDeviceInfo::ConstantMem); + } + if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { + mSWBits.set(AMDGPUDeviceInfo::PrivateMem); + } else { + mHWBits.set(AMDGPUDeviceInfo::PrivateMem); + } + if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) { + mSWBits.set(AMDGPUDeviceInfo::BarrierDetect); + } + mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps); + mSWBits.set(AMDGPUDeviceInfo::LongOps); +} + +AMDGPUDeviceInfo::ExecutionMode +AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const { + if (mHWBits[Caps]) { + assert(!mSWBits[Caps] && "Cannot set both SW and HW caps"); + return AMDGPUDeviceInfo::Hardware; + } + + if (mSWBits[Caps]) { + assert(!mHWBits[Caps] && "Cannot set both SW and HW caps"); + return AMDGPUDeviceInfo::Software; + } + + return AMDGPUDeviceInfo::Unsupported; + +} + +bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const { + return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported; +} + +bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const { + return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware; +} + +bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const { + return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software; +} + +std::string +AMDGPUDevice::getDataLayout() const { + return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/lib/Target/R600/AMDILDevice.h b/lib/Target/R600/AMDILDevice.h new file mode 100644 index 0000000000..97df98cafb --- /dev/null +++ b/lib/Target/R600/AMDILDevice.h @@ -0,0 +1,117 @@ +//===---- AMDILDevice.h - Define Device Data for AMDGPU -----*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface for the subtarget data classes. +// +/// This file will define the interface that each generation needs to +/// implement in order to correctly answer queries on the capabilities of the +/// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef AMDILDEVICEIMPL_H +#define AMDILDEVICEIMPL_H +#include "AMDIL.h" +#include "llvm/ADT/BitVector.h" + +namespace llvm { + class AMDGPUSubtarget; + class MCStreamer; +//===----------------------------------------------------------------------===// +// Interface for data that is specific to a single device +//===----------------------------------------------------------------------===// +class AMDGPUDevice { +public: + AMDGPUDevice(AMDGPUSubtarget *ST); + virtual ~AMDGPUDevice(); + + // Enum values for the various memory types. + enum { + RAW_UAV_ID = 0, + ARENA_UAV_ID = 1, + LDS_ID = 2, + GDS_ID = 3, + SCRATCH_ID = 4, + CONSTANT_ID = 5, + GLOBAL_ID = 6, + MAX_IDS = 7 + } IO_TYPE_IDS; + + /// \returns The max LDS size that the hardware supports. Size is in + /// bytes. + virtual size_t getMaxLDSSize() const = 0; + + /// \returns The max GDS size that the hardware supports if the GDS is + /// supported by the hardware. Size is in bytes. + virtual size_t getMaxGDSSize() const; + + /// \returns The max number of hardware constant address spaces that + /// are supported by this device. + virtual size_t getMaxNumCBs() const; + + /// \returns The max number of bytes a single hardware constant buffer + /// can support. Size is in bytes. + virtual size_t getMaxCBSize() const; + + /// \returns The max number of bytes allowed by the hardware scratch + /// buffer. Size is in bytes. + virtual size_t getMaxScratchSize() const; + + /// \brief Get the flag that corresponds to the device. + virtual uint32_t getDeviceFlag() const; + + /// \returns The number of work-items that exist in a single hardware + /// wavefront. + virtual size_t getWavefrontSize() const = 0; + + /// \brief Get the generational name of this specific device. + virtual uint32_t getGeneration() const = 0; + + /// \brief Get the stack alignment of this specific device. + virtual uint32_t getStackAlignment() const; + + /// \brief Get the resource ID for this specific device. + virtual uint32_t getResourceID(uint32_t DeviceID) const = 0; + + /// \brief Get the max number of UAV's for this device. + virtual uint32_t getMaxNumUAVs() const = 0; + + + // API utilizing more detailed capabilities of each family of + // cards. If a capability is supported, then either usesHardware or + // usesSoftware returned true. If usesHardware returned true, then + // usesSoftware must return false for the same capability. Hardware + // execution means that the feature is done natively by the hardware + // and is not emulated by the softare. Software execution means + // that the feature could be done in the hardware, but there is + // software that emulates it with possibly using the hardware for + // support since the hardware does not fully comply with OpenCL + // specs. + + bool isSupported(AMDGPUDeviceInfo::Caps Mode) const; + bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const; + bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const; + virtual std::string getDataLayout() const; + static const unsigned int MAX_LDS_SIZE_700 = 16384; + static const unsigned int MAX_LDS_SIZE_800 = 32768; + static const unsigned int WavefrontSize = 64; + static const unsigned int HalfWavefrontSize = 32; + static const unsigned int QuarterWavefrontSize = 16; +protected: + virtual void setCaps(); + BitVector mHWBits; + llvm::BitVector mSWBits; + AMDGPUSubtarget *mSTM; + uint32_t DeviceFlag; +private: + AMDGPUDeviceInfo::ExecutionMode + getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const; +}; + +} // namespace llvm +#endif // AMDILDEVICEIMPL_H diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp new file mode 100644 index 0000000000..9605fbe633 --- /dev/null +++ b/lib/Target/R600/AMDILDeviceInfo.cpp @@ -0,0 +1,94 @@ +//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Function that creates DeviceInfo from a device name and other information. +// +//==-----------------------------------------------------------------------===// +#include "AMDILDevices.h" +#include "AMDGPUSubtarget.h" + +using namespace llvm; +namespace llvm { +namespace AMDGPUDeviceInfo { + +AMDGPUDevice* getDeviceFromName(const std::string &deviceName, + AMDGPUSubtarget *ptr, + bool is64bit, bool is64on32bit) { + if (deviceName.c_str()[2] == '7') { + switch (deviceName.c_str()[3]) { + case '1': + return new AMDGPU710Device(ptr); + case '7': + return new AMDGPU770Device(ptr); + default: + return new AMDGPU7XXDevice(ptr); + } + } else if (deviceName == "cypress") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPUCypressDevice(ptr); + } else if (deviceName == "juniper") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPUEvergreenDevice(ptr); + } else if (deviceName == "redwood") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPURedwoodDevice(ptr); + } else if (deviceName == "cedar") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPUCedarDevice(ptr); + } else if (deviceName == "barts" || deviceName == "turks") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPUNIDevice(ptr); + } else if (deviceName == "cayman") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPUCaymanDevice(ptr); + } else if (deviceName == "caicos") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPUNIDevice(ptr); + } else if (deviceName == "SI") { + return new AMDGPUSIDevice(ptr); + } else { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDGPU7XXDevice(ptr); + } +} +} // End namespace AMDGPUDeviceInfo +} // End namespace llvm diff --git a/lib/Target/R600/AMDILDeviceInfo.h b/lib/Target/R600/AMDILDeviceInfo.h new file mode 100644 index 0000000000..4b2c3a53c7 --- /dev/null +++ b/lib/Target/R600/AMDILDeviceInfo.h @@ -0,0 +1,88 @@ +//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// +#ifndef AMDILDEVICEINFO_H +#define AMDILDEVICEINFO_H + + +#include <string> + +namespace llvm { + class AMDGPUDevice; + class AMDGPUSubtarget; + namespace AMDGPUDeviceInfo { + /// Each Capabilities can be executed using a hardware instruction, + /// emulated with a sequence of software instructions, or not + /// supported at all. + enum ExecutionMode { + Unsupported = 0, ///< Unsupported feature on the card(Default value) + /// This is the execution mode that is set if the feature is emulated in + /// software. + Software, + /// This execution mode is set if the feature exists natively in hardware + Hardware + }; + + enum Caps { + HalfOps = 0x1, ///< Half float is supported or not. + DoubleOps = 0x2, ///< Double is supported or not. + ByteOps = 0x3, ///< Byte(char) is support or not. + ShortOps = 0x4, ///< Short is supported or not. + LongOps = 0x5, ///< Long is supported or not. + Images = 0x6, ///< Images are supported or not. + ByteStores = 0x7, ///< ByteStores available(!HD4XXX). + ConstantMem = 0x8, ///< Constant/CB memory. + LocalMem = 0x9, ///< Local/LDS memory. + PrivateMem = 0xA, ///< Scratch/Private/Stack memory. + RegionMem = 0xB, ///< OCL GDS Memory Extension. + FMA = 0xC, ///< Use HW FMA or SW FMA. + ArenaSegment = 0xD, ///< Use for Arena UAV per pointer 12-1023. + MultiUAV = 0xE, ///< Use for UAV per Pointer 0-7. + Reserved0 = 0xF, ///< ReservedFlag + NoAlias = 0x10, ///< Cached loads. + Signed24BitOps = 0x11, ///< Peephole Optimization. + /// Debug mode implies that no hardware features or optimizations + /// are performned and that all memory access go through a single + /// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX). + Debug = 0x12, + CachedMem = 0x13, ///< Cached mem is available or not. + BarrierDetect = 0x14, ///< Detect duplicate barriers. + Reserved1 = 0x15, ///< Reserved flag + ByteLDSOps = 0x16, ///< Flag to specify if byte LDS ops are available. + ArenaVectors = 0x17, ///< Flag to specify if vector loads from arena work. + TmrReg = 0x18, ///< Flag to specify if Tmr register is supported. + NoInline = 0x19, ///< Flag to specify that no inlining should occur. + MacroDB = 0x1A, ///< Flag to specify that backend handles macrodb. + HW64BitDivMod = 0x1B, ///< Flag for backend to generate 64bit div/mod. + ArenaUAV = 0x1C, ///< Flag to specify that arena uav is supported. + PrivateUAV = 0x1D, ///< Flag to specify that private memory uses uav's. + /// If more capabilities are required, then + /// this number needs to be increased. + /// All capabilities must come before this + /// number. + MaxNumberCapabilities = 0x20 + }; + /// These have to be in order with the older generations + /// having the lower number enumerations. + enum Generation { + HD4XXX = 0, ///< 7XX based devices. + HD5XXX, ///< Evergreen based devices. + HD6XXX, ///< NI/Evergreen+ based devices. + HD7XXX, ///< Southern Islands based devices. + HDTEST, ///< Experimental feature testing device. + HDNUMGEN + }; + + + AMDGPUDevice* + getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr, + bool is64bit = false, bool is64on32bit = false); + } // namespace AMDILDeviceInfo +} // namespace llvm +#endif // AMDILDEVICEINFO_H diff --git a/lib/Target/R600/AMDILDevices.h b/lib/Target/R600/AMDILDevices.h new file mode 100644 index 0000000000..636fa6d359 --- /dev/null +++ b/lib/Target/R600/AMDILDevices.h @@ -0,0 +1,19 @@ +//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// +#ifndef AMDIL_DEVICES_H +#define AMDIL_DEVICES_H +// Include all of the device specific header files +#include "AMDIL7XXDevice.h" +#include "AMDILDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" +#include "AMDILSIDevice.h" + +#endif // AMDIL_DEVICES_H diff --git a/lib/Target/R600/AMDILEvergreenDevice.cpp b/lib/Target/R600/AMDILEvergreenDevice.cpp new file mode 100644 index 0000000000..c5213a0410 --- /dev/null +++ b/lib/Target/R600/AMDILEvergreenDevice.cpp @@ -0,0 +1,169 @@ +//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// +#include "AMDILEvergreenDevice.h" + +using namespace llvm; + +AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST) +: AMDGPUDevice(ST) { + setCaps(); + std::string name = ST->getDeviceName(); + if (name == "cedar") { + DeviceFlag = OCL_DEVICE_CEDAR; + } else if (name == "redwood") { + DeviceFlag = OCL_DEVICE_REDWOOD; + } else if (name == "cypress") { + DeviceFlag = OCL_DEVICE_CYPRESS; + } else { + DeviceFlag = OCL_DEVICE_JUNIPER; + } +} + +AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() { +} + +size_t AMDGPUEvergreenDevice::getMaxLDSSize() const { + if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_800; + } else { + return 0; + } +} +size_t AMDGPUEvergreenDevice::getMaxGDSSize() const { + if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { + return MAX_LDS_SIZE_800; + } else { + return 0; + } +} +uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const { + return 12; +} + +uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const { + switch(id) { + default: + assert(0 && "ID type passed in is unknown!"); + break; + case CONSTANT_ID: + case RAW_UAV_ID: + return GLOBAL_RETURN_RAW_UAV_ID; + case GLOBAL_ID: + case ARENA_UAV_ID: + return DEFAULT_ARENA_UAV_ID; + case LDS_ID: + if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { + return DEFAULT_LDS_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + case GDS_ID: + if (usesHardware(AMDGPUDeviceInfo::RegionMem)) { + return DEFAULT_GDS_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + case SCRATCH_ID: + if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) { + return DEFAULT_SCRATCH_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + }; + return 0; +} + +size_t AMDGPUEvergreenDevice::getWavefrontSize() const { + return AMDGPUDevice::WavefrontSize; +} + +uint32_t AMDGPUEvergreenDevice::getGeneration() const { + return AMDGPUDeviceInfo::HD5XXX; +} + +void AMDGPUEvergreenDevice::setCaps() { + mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); + mHWBits.set(AMDGPUDeviceInfo::ArenaUAV); + mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod); + mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod); + mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps); + if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) { + mHWBits.set(AMDGPUDeviceInfo::ByteStores); + } + if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) { + mSWBits.set(AMDGPUDeviceInfo::LocalMem); + mSWBits.set(AMDGPUDeviceInfo::RegionMem); + } else { + mHWBits.set(AMDGPUDeviceInfo::LocalMem); + mHWBits.set(AMDGPUDeviceInfo::RegionMem); + } + mHWBits.set(AMDGPUDeviceInfo::Images); + if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) { + mHWBits.set(AMDGPUDeviceInfo::NoAlias); + } + mHWBits.set(AMDGPUDeviceInfo::CachedMem); + if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) { + mHWBits.set(AMDGPUDeviceInfo::MultiUAV); + } + mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps); + mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps); + mHWBits.set(AMDGPUDeviceInfo::ArenaVectors); + mHWBits.set(AMDGPUDeviceInfo::LongOps); + mSWBits.reset(AMDGPUDeviceInfo::LongOps); + mHWBits.set(AMDGPUDeviceInfo::TmrReg); +} + +AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST) + : AMDGPUEvergreenDevice(ST) { + setCaps(); +} + +AMDGPUCypressDevice::~AMDGPUCypressDevice() { +} + +void AMDGPUCypressDevice::setCaps() { + if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { + mHWBits.set(AMDGPUDeviceInfo::DoubleOps); + mHWBits.set(AMDGPUDeviceInfo::FMA); + } +} + + +AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST) + : AMDGPUEvergreenDevice(ST) { + setCaps(); +} + +AMDGPUCedarDevice::~AMDGPUCedarDevice() { +} + +void AMDGPUCedarDevice::setCaps() { + mSWBits.set(AMDGPUDeviceInfo::FMA); +} + +size_t AMDGPUCedarDevice::getWavefrontSize() const { + return AMDGPUDevice::QuarterWavefrontSize; +} + +AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST) + : AMDGPUEvergreenDevice(ST) { + setCaps(); +} + +AMDGPURedwoodDevice::~AMDGPURedwoodDevice() { +} + +void AMDGPURedwoodDevice::setCaps() { + mSWBits.set(AMDGPUDeviceInfo::FMA); +} + +size_t AMDGPURedwoodDevice::getWavefrontSize() const { + return AMDGPUDevice::HalfWavefrontSize; +} diff --git a/lib/Target/R600/AMDILEvergreenDevice.h b/lib/Target/R600/AMDILEvergreenDevice.h new file mode 100644 index 0000000000..ea90f774a8 --- /dev/null +++ b/lib/Target/R600/AMDILEvergreenDevice.h @@ -0,0 +1,93 @@ +//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface for the subtarget data classes. +/// +/// This file will define the interface that each generation needs to +/// implement in order to correctly answer queries on the capabilities of the +/// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef AMDILEVERGREENDEVICE_H +#define AMDILEVERGREENDEVICE_H +#include "AMDGPUSubtarget.h" +#include "AMDILDevice.h" + +namespace llvm { + class AMDGPUSubtarget; +//===----------------------------------------------------------------------===// +// Evergreen generation of devices and their respective sub classes +//===----------------------------------------------------------------------===// + + +/// \brief The AMDGPUEvergreenDevice is the base device class for all of the Evergreen +/// series of cards. +/// +/// This class contains information required to differentiate +/// the Evergreen device from the generic AMDGPUDevice. This device represents +/// that capabilities of the 'Juniper' cards, also known as the HD57XX. +class AMDGPUEvergreenDevice : public AMDGPUDevice { +public: + AMDGPUEvergreenDevice(AMDGPUSubtarget *ST); + virtual ~AMDGPUEvergreenDevice(); + virtual size_t getMaxLDSSize() const; + virtual size_t getMaxGDSSize() const; + virtual size_t getWavefrontSize() const; + virtual uint32_t getGeneration() const; + virtual uint32_t getMaxNumUAVs() const; + virtual uint32_t getResourceID(uint32_t) const; +protected: + virtual void setCaps(); +}; + +/// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has +/// support for double precision operations. This device is used to represent +/// both the Cypress and Hemlock cards, which are commercially known as HD58XX +/// and HD59XX cards. +class AMDGPUCypressDevice : public AMDGPUEvergreenDevice { +public: + AMDGPUCypressDevice(AMDGPUSubtarget *ST); + virtual ~AMDGPUCypressDevice(); +private: + virtual void setCaps(); +}; + + +/// \brief The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based +/// devices. +/// +/// This class differs from the base AMDGPUEvergreenDevice in that the +/// device is a ~quarter of the 'Juniper'. These are commercially known as the +/// HD54XX and HD53XX series of cards. +class AMDGPUCedarDevice : public AMDGPUEvergreenDevice { +public: + AMDGPUCedarDevice(AMDGPUSubtarget *ST); + virtual ~AMDGPUCedarDevice(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; + +/// \brief The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based +/// devices. +/// +/// This class differs from the base class, in that these devices are +/// considered about half of a 'Juniper' device. These are commercially known as +/// the HD55XX and HD56XX series of cards. +class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice { +public: + AMDGPURedwoodDevice(AMDGPUSubtarget *ST); + virtual ~AMDGPURedwoodDevice(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; + +} // namespace llvm +#endif // AMDILEVERGREENDEVICE_H diff --git a/lib/Target/R600/AMDILFrameLowering.cpp b/lib/Target/R600/AMDILFrameLowering.cpp new file mode 100644 index 0000000000..9ad495ab48 --- /dev/null +++ b/lib/Target/R600/AMDILFrameLowering.cpp @@ -0,0 +1,47 @@ +//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface to describe a layout of a stack frame on a AMDGPU target +/// machine. +// +//===----------------------------------------------------------------------===// +#include "AMDILFrameLowering.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +using namespace llvm; +AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, + int LAO, unsigned TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) { +} + +AMDGPUFrameLowering::~AMDGPUFrameLowering() { +} + +int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI); +} + +const TargetFrameLowering::SpillSlot * +AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { + NumEntries = 0; + return 0; +} +void +AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const { +} +void +AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { +} +bool +AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const { + return false; +} diff --git a/lib/Target/R600/AMDILFrameLowering.h b/lib/Target/R600/AMDILFrameLowering.h new file mode 100644 index 0000000000..51337c3dd2 --- /dev/null +++ b/lib/Target/R600/AMDILFrameLowering.h @@ -0,0 +1,40 @@ +//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface to describe a layout of a stack frame on a AMDIL target +/// machine. +// +//===----------------------------------------------------------------------===// +#ifndef AMDILFRAME_LOWERING_H +#define AMDILFRAME_LOWERING_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +/// \brief Information about the stack frame layout on the AMDGPU targets. +/// +/// It holds the direction of the stack growth, the known stack alignment on +/// entry to each function, and the offset to the locals area. +/// See TargetFrameInfo for more comments. +class AMDGPUFrameLowering : public TargetFrameLowering { +public: + AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1); + virtual ~AMDGPUFrameLowering(); + virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const; + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool hasFP(const MachineFunction &MF) const; +}; +} // namespace llvm +#endif // AMDILFRAME_LOWERING_H diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp new file mode 100644 index 0000000000..d15ed393c1 --- /dev/null +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -0,0 +1,485 @@ +//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Defines an instruction selector for the AMDGPU target. +// +//===----------------------------------------------------------------------===// +#include "AMDGPUInstrInfo.h" +#include "AMDGPUISelLowering.h" // For AMDGPUISD +#include "AMDGPURegisterInfo.h" +#include "AMDILDevices.h" +#include "R600InstrInfo.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include <list> +#include <queue> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +namespace { +/// AMDGPU specific code to select AMDGPU machine instructions for +/// SelectionDAG operations. +class AMDGPUDAGToDAGISel : public SelectionDAGISel { + // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can + // make the right decision when generating code for different targets. + const AMDGPUSubtarget &Subtarget; +public: + AMDGPUDAGToDAGISel(TargetMachine &TM); + virtual ~AMDGPUDAGToDAGISel(); + + SDNode *Select(SDNode *N); + virtual const char *getPassName() const; + +private: + inline SDValue getSmallIPtrImm(unsigned Imm); + + // Complex pattern selectors + bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); + bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); + + static bool checkType(const Value *ptr, unsigned int addrspace); + static const Value *getBasePointerValue(const Value *V); + + static bool isGlobalStore(const StoreSDNode *N); + static bool isPrivateStore(const StoreSDNode *N); + static bool isLocalStore(const StoreSDNode *N); + static bool isRegionStore(const StoreSDNode *N); + + static bool isCPLoad(const LoadSDNode *N); + static bool isConstantLoad(const LoadSDNode *N, int cbID); + static bool isGlobalLoad(const LoadSDNode *N); + static bool isParamLoad(const LoadSDNode *N); + static bool isPrivateLoad(const LoadSDNode *N); + static bool isLocalLoad(const LoadSDNode *N); + static bool isRegionLoad(const LoadSDNode *N); + + bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); + bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); + bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Include the pieces autogenerated from the target description. +#include "AMDGPUGenDAGISel.inc" +}; +} // end anonymous namespace + +/// \brief This pass converts a legalized DAG into a AMDGPU-specific +// DAG, ready for instruction scheduling. +FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM + ) { + return new AMDGPUDAGToDAGISel(TM); +} + +AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM + ) + : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>()) { +} + +AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { +} + +SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); +} + +bool AMDGPUDAGToDAGISel::SelectADDRParam( + SDValue Addr, SDValue& R1, SDValue& R2) { + + if (Addr.getOpcode() == ISD::FrameIndex) { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } + } else if (Addr.getOpcode() == ISD::ADD) { + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } + return true; +} + +bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + return SelectADDRParam(Addr, R1, R2); +} + + +bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + + if (Addr.getOpcode() == ISD::FrameIndex) { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } + } else if (Addr.getOpcode() == ISD::ADD) { + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } + return true; +} + +SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { + unsigned int Opc = N->getOpcode(); + if (N->isMachineOpcode()) { + return NULL; // Already selected. + } + switch (Opc) { + default: break; + case ISD::FrameIndex: { + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) { + unsigned int FI = FIN->getIndex(); + EVT OpVT = N->getValueType(0); + unsigned int NewOpc = AMDGPU::COPY; + SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); + return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); + } + break; + } + case ISD::ConstantFP: + case ISD::Constant: { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + // XXX: Custom immediate lowering not implemented yet. Instead we use + // pseudo instructions defined in SIInstructions.td + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + break; + } + const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + + uint64_t ImmValue = 0; + unsigned ImmReg = AMDGPU::ALU_LITERAL_X; + + if (N->getOpcode() == ISD::ConstantFP) { + // XXX: 64-bit Immediates not supported yet + assert(N->getValueType(0) != MVT::f64); + + ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N); + APFloat Value = C->getValueAPF(); + float FloatValue = Value.convertToFloat(); + if (FloatValue == 0.0) { + ImmReg = AMDGPU::ZERO; + } else if (FloatValue == 0.5) { + ImmReg = AMDGPU::HALF; + } else if (FloatValue == 1.0) { + ImmReg = AMDGPU::ONE; + } else { + ImmValue = Value.bitcastToAPInt().getZExtValue(); + } + } else { + // XXX: 64-bit Immediates not supported yet + assert(N->getValueType(0) != MVT::i64); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + if (C->getZExtValue() == 0) { + ImmReg = AMDGPU::ZERO; + } else if (C->getZExtValue() == 1) { + ImmReg = AMDGPU::ONE_INT; + } else { + ImmValue = C->getZExtValue(); + } + } + + for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use); + Use != SDNode::use_end(); Use = Next) { + Next = llvm::next(Use); + std::vector<SDValue> Ops; + for (unsigned i = 0; i < Use->getNumOperands(); ++i) { + Ops.push_back(Use->getOperand(i)); + } + + if (!Use->isMachineOpcode()) { + if (ImmReg == AMDGPU::ALU_LITERAL_X) { + // We can only use literal constants (e.g. AMDGPU::ZERO, + // AMDGPU::ONE, etc) in machine opcodes. + continue; + } + } else { + if (!TII->isALUInstr(Use->getMachineOpcode())) { + continue; + } + + int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(), R600Operands::IMM); + assert(ImmIdx != -1); + + // subtract one from ImmIdx, because the DST operand is usually index + // 0 for MachineInstrs, but we have no DST in the Ops vector. + ImmIdx--; + + // Check that we aren't already using an immediate. + // XXX: It's possible for an instruction to have more than one + // immediate operand, but this is not supported yet. + if (ImmReg == AMDGPU::ALU_LITERAL_X) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx)); + assert(C); + + if (C->getZExtValue() != 0) { + // This instruction is already using an immediate. + continue; + } + + // Set the immediate value + Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32); + } + } + // Set the immediate register + Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32); + + CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands()); + } + break; + } + } + return SelectCode(N); +} + +bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { + if (!ptr) { + return false; + } + Type *ptrType = ptr->getType(); + return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; +} + +const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V) { + if (!V) { + return NULL; + } + const Value *ret = NULL; + ValueMap<const Value *, bool> ValueBitMap; + std::queue<const Value *, std::list<const Value *> > ValueQueue; + ValueQueue.push(V); + while (!ValueQueue.empty()) { + V = ValueQueue.front(); + if (ValueBitMap.find(V) == ValueBitMap.end()) { + ValueBitMap[V] = true; + if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { + ret = V; + break; + } else if (dyn_cast<GlobalVariable>(V)) { + ret = V; + break; + } else if (dyn_cast<Constant>(V)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); + if (CE) { + ValueQueue.push(CE->getOperand(0)); + } + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + ret = AI; + break; + } else if (const Instruction *I = dyn_cast<Instruction>(V)) { + uint32_t numOps = I->getNumOperands(); + for (uint32_t x = 0; x < numOps; ++x) { + ValueQueue.push(I->getOperand(x)); + } + } else { + assert(!"Found a Value that we didn't know how to handle!"); + } + } + ValueQueue.pop(); + } + return ret; +} + +bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { + return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)); +} + +bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { + if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) { + return true; + } + MachineMemOperand *MMO = N->getMemOperand(); + const Value *V = MMO->getValue(); + const Value *BV = getBasePointerValue(V); + if (MMO + && MMO->getValue() + && ((V && dyn_cast<GlobalValue>(V)) + || (BV && dyn_cast<GlobalValue>( + getBasePointerValue(MMO->getValue()))))) { + return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS); + } else { + return false; + } +} + +bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); +} + +bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) { + MachineMemOperand *MMO = N->getMemOperand(); + if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { + if (MMO) { + const Value *V = MMO->getValue(); + const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V); + if (PSV && PSV == PseudoSourceValue::getConstantPool()) { + return true; + } + } + } + return false; +} + +bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { + if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { + // Check to make sure we are not a constant pool load or a constant load + // that is marked as a private load + if (isCPLoad(N) || isConstantLoad(N, -1)) { + return false; + } + } + if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS) + && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) { + return true; + } + return false; +} + +const char *AMDGPUDAGToDAGISel::getPassName() const { + return "AMDGPU DAG->DAG Pattern Instruction Selection"; +} + +#ifdef DEBUGTMP +#undef INT64_C +#endif +#undef DEBUGTMP + +///==== AMDGPU Functions ====/// + +bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, + SDValue& Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + + + if (Addr.getOpcode() == ISD::ADD) { + bool Match = false; + + // Find the base ptr and the offset + for (unsigned i = 0; i < Addr.getNumOperands(); i++) { + SDValue Arg = Addr.getOperand(i); + ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg); + // This arg isn't a constant so it must be the base PTR. + if (!OffsetNode) { + Base = Addr.getOperand(i); + continue; + } + // Check if the constant argument fits in 8-bits. The offset is in bytes + // so we need to convert it to dwords. + if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) { + Match = true; + Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, + MVT::i32); + } + } + return Match; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) { + ConstantSDNode * IMMOffset; + + if (Addr.getOpcode() == ISD::ADD + && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + && isInt<16>(IMMOffset->getZExtValue())) { + + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); + return true; + // If the pointer address is constant, we can move it to the offset field. + } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) + && isInt<16>(IMMOffset->getZExtValue())) { + Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + CurDAG->getEntryNode().getDebugLoc(), + AMDGPU::ZERO, MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); + return true; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, + SDValue& Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() != ISD::ADD) { + return false; + } + + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + + return true; +} diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp new file mode 100644 index 0000000000..2e60adcc99 --- /dev/null +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -0,0 +1,651 @@ +//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief TargetLowering functions borrowed from AMDIL. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUISelLowering.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" +#include "AMDILDevices.h" +#include "AMDILIntrinsicInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// +#include "AMDGPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions End +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TargetLowering Class Implementation Begins +//===----------------------------------------------------------------------===// +void AMDGPUTargetLowering::InitAMDILLowering() { + int types[] = { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::f32, + (int)MVT::f64, + (int)MVT::i64, + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + + int IntTypes[] = { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::i64 + }; + + int FloatTypes[] = { + (int)MVT::f32, + (int)MVT::f64 + }; + + int VectorTypes[] = { + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + size_t NumTypes = sizeof(types) / sizeof(*types); + size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); + size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); + size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); + + const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); + // These are the current register classes that are + // supported + + for (unsigned int x = 0; x < NumTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + + //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types + // We cannot sextinreg, expand to shifts + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); + setOperationAction(ISD::SUBE, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::BRCOND, VT, Custom); + setOperationAction(ISD::BR_JT, VT, Expand); + setOperationAction(ISD::BRIND, VT, Expand); + // TODO: Implement custom UREM/SREM routines + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + if (VT != MVT::i64 && VT != MVT::v2i64) { + setOperationAction(ISD::SDIV, VT, Custom); + } + } + for (unsigned int x = 0; x < NumFloatTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + + // IL does not have these operations for floating point types + setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); + setOperationAction(ISD::SETOLT, VT, Expand); + setOperationAction(ISD::SETOGE, VT, Expand); + setOperationAction(ISD::SETOGT, VT, Expand); + setOperationAction(ISD::SETOLE, VT, Expand); + setOperationAction(ISD::SETULT, VT, Expand); + setOperationAction(ISD::SETUGE, VT, Expand); + setOperationAction(ISD::SETUGT, VT, Expand); + setOperationAction(ISD::SETULE, VT, Expand); + } + + for (unsigned int x = 0; x < NumIntTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; + + // GPU also does not have divrem function for signed or unsigned + setOperationAction(ISD::SDIVREM, VT, Expand); + + // GPU does not have [S|U]MUL_LOHI functions as a single instruction + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + + // GPU doesn't have a rotl, rotr, or byteswap instruction + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); + + // GPU doesn't have any counting operators + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + } + + for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; + + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + // setOperationAction(ISD::VSETCC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + + } + if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::v2i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::v2i64, Expand); + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SREM, MVT::v2i64, Expand); + setOperationAction(ISD::Constant , MVT::i64 , Legal); + setOperationAction(ISD::SDIV, MVT::v2i64, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); + } + if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { + // we support loading/storing v2f64 but not operations on the type + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); + // We want to expand vector conversions into their scalar + // counterparts. + setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + } + // TODO: Fix the UDIV24 algorithm so it works for these + // types correctly. This needs vector comparisons + // for this to work correctly. + setOperationAction(ISD::UDIV, MVT::v2i8, Expand); + setOperationAction(ISD::UDIV, MVT::v4i8, Expand); + setOperationAction(ISD::UDIV, MVT::v2i16, Expand); + setOperationAction(ISD::UDIV, MVT::v4i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + setOperationAction(ISD::SUBC, MVT::Other, Expand); + setOperationAction(ISD::ADDE, MVT::Other, Expand); + setOperationAction(ISD::ADDC, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + + + // Use the default implementation. + setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); + setOperationAction(ISD::Constant , MVT::i32 , Legal); + + setSchedulingPreference(Sched::RegPressure); + setPow2DivIsCheap(false); + setSelectIsExpensive(true); + setJumpIsExpensive(true); + + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + +} + +bool +AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const { + return false; +} + +// The backend supports 32 and 64 bit floating point immediates +bool +AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return true; + } else { + return false; + } +} + +bool +AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return false; + } else { + return true; + } +} + + +// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +// be zero. Op is expected to be a target specific node. Used by DAG +// combiner. + +void +AMDGPUTargetLowering::computeMaskedBitsForTargetNode( + const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + APInt KnownZero2; + APInt KnownOne2; + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything + switch (Op.getOpcode()) { + default: break; + case ISD::SELECT_CC: + DAG.ComputeMaskedBits( + Op.getOperand(1), + KnownZero, + KnownOne, + Depth + 1 + ); + DAG.ComputeMaskedBits( + Op.getOperand(0), + KnownZero2, + KnownOne2 + ); + assert((KnownZero & KnownOne) == 0 + && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 + && "Bits known to be one AND zero?"); + // Only known if known in both the LHS and RHS + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + }; +} + +//===----------------------------------------------------------------------===// +// Other Lowering Hooks +//===----------------------------------------------------------------------===// + +SDValue +AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSDIV64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSDIV32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16 + || OVT.getScalarType() == MVT::i8) { + DST = LowerSDIV24(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSREM64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSREM32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16) { + DST = LowerSREM16(Op, DAG); + } else if (OVT.getScalarType() == MVT::i8) { + DST = LowerSREM8(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { + SDValue Data = Op.getOperand(0); + VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); + DebugLoc DL = Op.getDebugLoc(); + EVT DVT = Data.getValueType(); + EVT BVT = BaseType->getVT(); + unsigned baseBits = BVT.getScalarType().getSizeInBits(); + unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; + unsigned shiftBits = srcBits - baseBits; + if (srcBits < 32) { + // If the op is less than 32 bits, then it needs to extend to 32bits + // so it can properly keep the upper bits valid. + EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); + Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); + shiftBits = 32 - baseBits; + DVT = IVT; + } + SDValue Shift = DAG.getConstant(shiftBits, DVT); + // Shift left by 'Shift' bits. + Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); + // Signed shift Right by 'Shift' bits. + Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); + if (srcBits < 32) { + // Once the sign extension is done, the op needs to be converted to + // its original type. + Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); + } + return Data; +} +EVT +AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { + int iSize = (size * numEle); + int vEle = (iSize >> ((size == 64) ? 6 : 5)); + if (!vEle) { + vEle = 1; + } + if (size == 64) { + if (vEle == 1) { + return EVT(MVT::i64); + } else { + return EVT(MVT::getVectorVT(MVT::i64, vEle)); + } + } else { + if (vEle == 1) { + return EVT(MVT::i32); + } else { + return EVT(MVT::getVectorVT(MVT::i32, vEle)); + } + } +} + +SDValue +AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Jump = Op.getOperand(2); + SDValue Result; + Result = DAG.getNode( + AMDGPUISD::BRANCH_COND, + Op.getDebugLoc(), + Op.getValueType(), + Chain, Jump, Cond); + return Result; +} + +SDValue +AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + MVT INTTY; + MVT FLTTY; + if (!OVT.isVector()) { + INTTY = MVT::i32; + FLTTY = MVT::f32; + } else if (OVT.getVectorNumElements() == 2) { + INTTY = MVT::v2i32; + FLTTY = MVT::v2f32; + } else if (OVT.getVectorNumElements() == 4) { + INTTY = MVT::v4i32; + FLTTY = MVT::v4f32; + } + unsigned bitsize = OVT.getScalarType().getSizeInBits(); + // char|short jq = ia ^ ib; + SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); + + // jq = jq >> (bitsize - 2) + jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); + + // jq = jq | 0x1 + jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); + + // jq = (int)jq + jq = DAG.getSExtOrTrunc(jq, DL, INTTY); + + // int ia = (int)LHS; + SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); + + // int ib, (int)RHS; + SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); + + // float fa = (float)ia; + SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + + // float fb = (float)ib; + SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + + // float fq = native_divide(fa, fb); + SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); + + // fq = trunc(fq); + fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + + // float fqneg = -fq; + SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); + + // float fr = mad(fqneg, fb, fa); + SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); + + // int iq = (int)fq; + SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + + // fr = fabs(fr); + fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); + + // fb = fabs(fb); + fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); + + // int cv = fr >= fb; + SDValue cv; + if (INTTY == MVT::i32) { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } else { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } + // jq = (cv ? jq : 0); + jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, + DAG.getConstant(0, OVT)); + // dst = iq + jq; + iq = DAG.getSExtOrTrunc(iq, DL, OVT); + iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); + return iq; +} + +SDValue +AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSDIV32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r0, r0, r1 + // ixor r10, r10, r11 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getSelectCC(DL, + r0, DAG.getConstant(0, OVT), + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + ISD::SETLT); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getSelectCC(DL, + r1, DAG.getConstant(0, OVT), + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + ISD::SETLT); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r0, r0, r1 + r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); + + // ixor r10, r10, r11 + r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { + return SDValue(Op.getNode(), 0); +} + +SDValue +AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i8) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i8) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i16) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i16) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSREM32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r20, r0, r1 + // umul r20, r20, r1 + // sub r0, r0, r20 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r20, r0, r1 + SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); + + // umul r20, r20, r1 + r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); + + // sub r0, r0, r20 + r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { + return SDValue(Op.getNode(), 0); +} diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td new file mode 100644 index 0000000000..e969bbf8ca --- /dev/null +++ b/lib/Target/R600/AMDILInstrInfo.td @@ -0,0 +1,208 @@ +//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file describes the AMDIL instructions in TableGen format. +// +//===----------------------------------------------------------------------===// +// AMDIL Instruction Predicate Definitions +// Predicate that is set to true if the hardware supports double precision +// divide +def HasHWDDiv : Predicate<"Subtarget.device()" + "->getGeneration() > AMDGPUDeviceInfo::HD4XXX && " + "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; + +// Predicate that is set to true if the hardware supports double, but not double +// precision divide in hardware +def HasSWDDiv : Predicate<"Subtarget.device()" + "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" + "Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">; + +// Predicate that is set to true if the hardware support 24bit signed +// math ops. Otherwise a software expansion to 32bit math ops is used instead. +def HasHWSign24Bit : Predicate<"Subtarget.device()" + "->getGeneration() > AMDGPUDeviceInfo::HD5XXX">; + +// Predicate that is set to true if 64bit operations are supported or not +def HasHW64Bit : Predicate<"Subtarget.device()" + "->usesHardware(AMDGPUDeviceInfo::LongOps)">; +def HasSW64Bit : Predicate<"Subtarget.device()" + "->usesSoftware(AMDGPUDeviceInfo::LongOps)">; + +// Predicate that is set to true if the timer register is supported +def HasTmrRegister : Predicate<"Subtarget.device()" + "->isSupported(AMDGPUDeviceInfo::TmrReg)">; +// Predicate that is true if we are at least evergreen series +def HasDeviceIDInst : Predicate<"Subtarget.device()" + "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">; + +// Predicate that is true if we have region address space. +def hasRegionAS : Predicate<"Subtarget.device()" + "->usesHardware(AMDGPUDeviceInfo::RegionMem)">; + +// Predicate that is false if we don't have region address space. +def noRegionAS : Predicate<"!Subtarget.device()" + "->isSupported(AMDGPUDeviceInfo::RegionMem)">; + + +// Predicate that is set to true if 64bit Mul is supported in the IL or not +def HasHW64Mul : Predicate<"Subtarget.calVersion()" + ">= CAL_VERSION_SC_139" + "&& Subtarget.device()" + "->getGeneration() >=" + "AMDGPUDeviceInfo::HD5XXX">; +def HasSW64Mul : Predicate<"Subtarget.calVersion()" + "< CAL_VERSION_SC_139">; +// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not +def HasHW64DivMod : Predicate<"Subtarget.device()" + "->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">; +def HasSW64DivMod : Predicate<"Subtarget.device()" + "->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">; + +// Predicate that is set to true if 64bit pointer are used. +def Has64BitPtr : Predicate<"Subtarget.is64bit()">; +def Has32BitPtr : Predicate<"!Subtarget.is64bit()">; +//===--------------------------------------------------------------------===// +// Custom Operands +//===--------------------------------------------------------------------===// +def brtarget : Operand<OtherVT>; + +//===--------------------------------------------------------------------===// +// Custom Selection DAG Type Profiles +//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Generic Profile Types +//===----------------------------------------------------------------------===// + +def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> + ]>; +def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3> + ]>; +def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [ + SDTCisEltOfVec<1, 0> + ]>; + +//===----------------------------------------------------------------------===// +// Flow Control Profile Types +//===----------------------------------------------------------------------===// +// Branch instruction where second and third are basic blocks +def SDTIL_BRCond : SDTypeProfile<0, 2, [ + SDTCisVT<0, OtherVT> + ]>; + +//===--------------------------------------------------------------------===// +// Custom Selection DAG Nodes +//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Flow Control DAG Nodes +//===----------------------------------------------------------------------===// +def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// Call/Return DAG Nodes +//===----------------------------------------------------------------------===// +def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +//===--------------------------------------------------------------------===// +// Instructions +//===--------------------------------------------------------------------===// +// Floating point math functions +def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; +def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>; + +//===----------------------------------------------------------------------===// +// Integer functions +//===----------------------------------------------------------------------===// +def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; + +//===--------------------------------------------------------------------===// +// Custom Pattern DAG Nodes +//===--------------------------------------------------------------------===// +def global_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +//===----------------------------------------------------------------------===// +// Load pattern fragments +//===----------------------------------------------------------------------===// +// Global address space loads +def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(dyn_cast<LoadSDNode>(N)); +}]>; +// Constant address space loads +def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); +}]>; + +//===----------------------------------------------------------------------===// +// Complex addressing mode patterns +//===----------------------------------------------------------------------===// +def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>; +def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>; +def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>; +def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>; + +//===----------------------------------------------------------------------===// +// Instruction format classes +//===----------------------------------------------------------------------===// +class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> +: Instruction { + + let Namespace = "AMDGPU"; + dag OutOperandList = outs; + dag InOperandList = ins; + let Pattern = pattern; + let AsmString = !strconcat(asmstr, "\n"); + let isPseudo = 1; + let Itinerary = NullALU; + bit hasIEEEFlag = 0; + bit hasZeroOpFlag = 0; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +//===--------------------------------------------------------------------===// +// Multiclass Instruction formats +//===--------------------------------------------------------------------===// +// Multiclass that handles branch instructions +multiclass BranchConditional<SDNode Op> { + def _i32 : ILFormat<(outs), + (ins brtarget:$target, GPRI32:$src0), + "; i32 Pseudo branch instruction", + [(Op bb:$target, GPRI32:$src0)]>; + def _f32 : ILFormat<(outs), + (ins brtarget:$target, GPRF32:$src0), + "; f32 Pseudo branch instruction", + [(Op bb:$target, GPRF32:$src0)]>; +} + +// Only scalar types should generate flow control +multiclass BranchInstr<string name> { + def _i32 : ILFormat<(outs), (ins GPRI32:$src), + !strconcat(name, " $src"), []>; + def _f32 : ILFormat<(outs), (ins GPRF32:$src), + !strconcat(name, " $src"), []>; +} +// Only scalar types should generate flow control +multiclass BranchInstr2<string name> { + def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1), + !strconcat(name, " $src0, $src1"), []>; + def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1), + !strconcat(name, " $src0, $src1"), []>; +} + +//===--------------------------------------------------------------------===// +// Intrinsics support +//===--------------------------------------------------------------------===// +include "AMDILIntrinsics.td" diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDILIntrinsicInfo.cpp new file mode 100644 index 0000000000..4ddb057d80 --- /dev/null +++ b/lib/Target/R600/AMDILIntrinsicInfo.cpp @@ -0,0 +1,79 @@ +//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Implementation of the IntrinsicInfo class. +// +//===-----------------------------------------------------------------------===// + +#include "AMDILIntrinsicInfo.h" +#include "AMDGPUSubtarget.h" +#include "AMDIL.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "AMDGPUGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + +AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) + : TargetIntrinsicInfo() { +} + +std::string +AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, + unsigned int numTys) const { + static const char* const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + if (IntrID < Intrinsic::num_intrinsics) { + return 0; + } + assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics + && "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + +unsigned int +AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const { +#define GET_FUNCTION_RECOGNIZER +#include "AMDGPUGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + AMDGPUIntrinsic::ID IntrinsicID + = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; + IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name); + + if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { + return IntrinsicID; + } + return 0; +} + +bool +AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const { + // Overload Table +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE +} + +Function* +AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + Type **Tys, + unsigned numTys) const { + llvm_unreachable("Not implemented"); +} diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDILIntrinsicInfo.h new file mode 100644 index 0000000000..35559e23fc --- /dev/null +++ b/lib/Target/R600/AMDILIntrinsicInfo.h @@ -0,0 +1,49 @@ +//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class. +// +//===-----------------------------------------------------------------------===// +#ifndef AMDIL_INTRINSICS_H +#define AMDIL_INTRINSICS_H + +#include "llvm/IR/Intrinsics.h" +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { +class TargetMachine; + +namespace AMDGPUIntrinsic { +enum ID { + last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1, +#define GET_INTRINSIC_ENUM_VALUES +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , num_AMDGPU_intrinsics +}; + +} // end namespace AMDGPUIntrinsic + +class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo { +public: + AMDGPUIntrinsicInfo(TargetMachine *tm); + std::string getName(unsigned int IntrId, Type **Tys = 0, + unsigned int numTys = 0) const; + unsigned int lookupName(const char *Name, unsigned int Len) const; + bool isOverloaded(unsigned int IID) const; + Function *getDeclaration(Module *M, unsigned int ID, + Type **Tys = 0, + unsigned int numTys = 0) const; +}; + +} // end namespace llvm + +#endif // AMDIL_INTRINSICS_H + diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td new file mode 100644 index 0000000000..3f9e20f0c8 --- /dev/null +++ b/lib/Target/R600/AMDILIntrinsics.td @@ -0,0 +1,242 @@ +//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file defines all of the amdil-specific intrinsics +// +//===---------------------------------------------------------------===// +//===--------------------------------------------------------------------===// +// Intrinsic classes +// Generic versions of the above classes but for Target specific intrinsics +// instead of SDNode patterns. +//===--------------------------------------------------------------------===// +let TargetPrefix = "AMDIL", isTarget = 1 in { + class VoidIntLong : + Intrinsic<[llvm_i64_ty], [], []>; + class VoidIntInt : + Intrinsic<[llvm_i32_ty], [], []>; + class VoidIntBool : + Intrinsic<[llvm_i32_ty], [], []>; + class UnaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + class UnaryIntFloat : + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + class ConvertIntFTOI : + Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; + class ConvertIntITOF : + Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; + class UnaryIntNoRetInt : + Intrinsic<[], [llvm_anyint_ty], []>; + class UnaryIntNoRetFloat : + Intrinsic<[], [llvm_anyfloat_ty], []>; + class BinaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class BinaryIntFloat : + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class BinaryIntNoRetInt : + Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; + class BinaryIntNoRetFloat : + Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; + class TernaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class TernaryIntFloat : + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class QuaternaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class UnaryAtomicInt : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class BinaryAtomicInt : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class TernaryAtomicInt : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; + class UnaryAtomicIntNoRet : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class BinaryAtomicIntNoRet : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class TernaryAtomicIntNoRet : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; +} + +let TargetPrefix = "AMDIL", isTarget = 1 in { + def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; + + def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, + TernaryIntInt; + def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, + TernaryIntInt; + def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, + UnaryIntInt; + def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, + UnaryIntInt; + def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">, + UnaryIntInt; + def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">, + UnaryIntInt; + def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">, + UnaryIntInt; + def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">, + TernaryIntInt; + def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">, + TernaryIntInt; + def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">, + QuaternaryIntInt; + def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">, + TernaryIntInt; + def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, + BinaryIntInt; + def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, + TernaryIntInt; + def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, + TernaryIntInt; + def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, + TernaryIntFloat; + def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, + BinaryIntInt; + def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, + BinaryIntInt; + def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">, + BinaryIntInt; + def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">, + BinaryIntInt; + def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, + BinaryIntInt; + def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, + BinaryIntInt; + def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, + TernaryIntInt; + def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, + TernaryIntInt; + def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, + BinaryIntInt; + def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, + BinaryIntInt; + def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">, + BinaryIntInt; + def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">, + BinaryIntInt; + def int_AMDIL_min : GCCBuiltin<"__amdil_min">, + BinaryIntFloat; + def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">, + BinaryIntInt; + def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">, + BinaryIntInt; + def int_AMDIL_max : GCCBuiltin<"__amdil_max">, + BinaryIntFloat; + def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">, + TernaryIntInt; + def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">, + TernaryIntInt; + def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">, + TernaryIntInt; + def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">, + UnaryIntFloat; + def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">, + TernaryIntFloat; + def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">, + UnaryIntFloat; + def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">, + UnaryIntFloat; + def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">, + UnaryIntFloat; + def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">, + UnaryIntFloat; + def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">, + UnaryIntFloat; + def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">, + UnaryIntFloat; + def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">, + UnaryIntFloat; + def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">, + UnaryIntFloat; + def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">, + UnaryIntFloat; + def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">, + UnaryIntFloat; + def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">, + UnaryIntFloat; + def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">, + UnaryIntFloat; + def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat; + def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat; + def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt; + def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">, + UnaryIntFloat; + def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">, + UnaryIntFloat; + def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">, + UnaryIntFloat; + def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">, + UnaryIntFloat; + def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">, + UnaryIntFloat; + def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">, + UnaryIntFloat; + def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">, + UnaryIntFloat; + def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">, + UnaryIntFloat; + def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">, + TernaryIntFloat; + def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">, + UnaryIntFloat; + def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">, + UnaryIntFloat; + def int_AMDIL_length : GCCBuiltin<"__amdil_length">, + UnaryIntFloat; + def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">, + TernaryIntFloat; + def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i32_ty], []>; + + def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">, + Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>; + def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">, + Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>; + def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">, + ConvertIntITOF; + def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">, + ConvertIntFTOI; + def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>; + def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">, + ConvertIntITOF; + def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">, + ConvertIntITOF; + def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">, + ConvertIntITOF; + def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">, + ConvertIntITOF; + def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">, + Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, + llvm_v2f32_ty, llvm_float_ty], []>; + def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">, + Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, + llvm_v2f32_ty], []>; + def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], []>; + def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], []>; +} diff --git a/lib/Target/R600/AMDILNIDevice.cpp b/lib/Target/R600/AMDILNIDevice.cpp new file mode 100644 index 0000000000..47c3f7f209 --- /dev/null +++ b/lib/Target/R600/AMDILNIDevice.cpp @@ -0,0 +1,65 @@ +//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// +#include "AMDILNIDevice.h" +#include "AMDGPUSubtarget.h" +#include "AMDILEvergreenDevice.h" + +using namespace llvm; + +AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST) + : AMDGPUEvergreenDevice(ST) { + std::string name = ST->getDeviceName(); + if (name == "caicos") { + DeviceFlag = OCL_DEVICE_CAICOS; + } else if (name == "turks") { + DeviceFlag = OCL_DEVICE_TURKS; + } else if (name == "cayman") { + DeviceFlag = OCL_DEVICE_CAYMAN; + } else { + DeviceFlag = OCL_DEVICE_BARTS; + } +} +AMDGPUNIDevice::~AMDGPUNIDevice() { +} + +size_t +AMDGPUNIDevice::getMaxLDSSize() const { + if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDGPUNIDevice::getGeneration() const { + return AMDGPUDeviceInfo::HD6XXX; +} + + +AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST) + : AMDGPUNIDevice(ST) { + setCaps(); +} + +AMDGPUCaymanDevice::~AMDGPUCaymanDevice() { +} + +void +AMDGPUCaymanDevice::setCaps() { + if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) { + mHWBits.set(AMDGPUDeviceInfo::DoubleOps); + mHWBits.set(AMDGPUDeviceInfo::FMA); + } + mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps); + mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps); + mSWBits.set(AMDGPUDeviceInfo::ArenaSegment); +} + diff --git a/lib/Target/R600/AMDILNIDevice.h b/lib/Target/R600/AMDILNIDevice.h new file mode 100644 index 0000000000..24a640845e --- /dev/null +++ b/lib/Target/R600/AMDILNIDevice.h @@ -0,0 +1,57 @@ +//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +/// \file +/// \brief Interface for the subtarget data classes. +/// +/// This file will define the interface that each generation needs to +/// implement in order to correctly answer queries on the capabilities of the +/// specific hardware. +//===---------------------------------------------------------------------===// +#ifndef AMDILNIDEVICE_H +#define AMDILNIDEVICE_H +#include "AMDGPUSubtarget.h" +#include "AMDILEvergreenDevice.h" + +namespace llvm { + +class AMDGPUSubtarget; +//===---------------------------------------------------------------------===// +// NI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +/// \brief The AMDGPUNIDevice is the base class for all Northern Island series of +/// cards. +/// +/// It is very similiar to the AMDGPUEvergreenDevice, with the major +/// exception being differences in wavefront size and hardware capabilities. The +/// NI devices are all 64 wide wavefronts and also add support for signed 24 bit +/// integer operations +class AMDGPUNIDevice : public AMDGPUEvergreenDevice { +public: + AMDGPUNIDevice(AMDGPUSubtarget*); + virtual ~AMDGPUNIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; +}; + +/// Just as the AMDGPUCypressDevice is the double capable version of the +/// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version +/// of the AMDGPUNIDevice. The other major difference is that the Cayman Device +/// has 4 wide ALU's, whereas the rest of the NI family is a 5 wide. +class AMDGPUCaymanDevice: public AMDGPUNIDevice { +public: + AMDGPUCaymanDevice(AMDGPUSubtarget*); + virtual ~AMDGPUCaymanDevice(); +private: + virtual void setCaps(); +}; + +static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800; +} // namespace llvm +#endif // AMDILNIDEVICE_H diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp new file mode 100644 index 0000000000..a5f7ee5053 --- /dev/null +++ b/lib/Target/R600/AMDILPeepholeOptimizer.cpp @@ -0,0 +1,1215 @@ +//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// + +#define DEBUG_TYPE "PeepholeOpt" +#ifdef DEBUG +#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) +#else +#define DEBUGME 0 +#endif + +#include "AMDILDevices.h" +#include "AMDGPUInstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Constants.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +#include <sstream> + +#if 0 +STATISTIC(PointerAssignments, "Number of dynamic pointer " + "assigments discovered"); +STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); +#endif + +using namespace llvm; +// The Peephole optimization pass is used to do simple last minute optimizations +// that are required for correct code or to remove redundant functions +namespace { + +class OpaqueType; + +class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass { +public: + TargetMachine &TM; + static char ID; + AMDGPUPeepholeOpt(TargetMachine &tm); + ~AMDGPUPeepholeOpt(); + const char *getPassName() const; + bool runOnFunction(Function &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + void getAnalysisUsage(AnalysisUsage &AU) const; +protected: +private: + // Function to initiate all of the instruction level optimizations. + bool instLevelOptimizations(BasicBlock::iterator *inst); + // Quick check to see if we need to dump all of the pointers into the + // arena. If this is correct, then we set all pointers to exist in arena. This + // is a workaround for aliasing of pointers in a struct/union. + bool dumpAllIntoArena(Function &F); + // Because I don't want to invalidate any pointers while in the + // safeNestedForEachFunction. I push atomic conversions to a vector and handle + // it later. This function does the conversions if required. + void doAtomicConversionIfNeeded(Function &F); + // Because __amdil_is_constant cannot be properly evaluated if + // optimizations are disabled, the call's are placed in a vector + // and evaluated after the __amdil_image* functions are evaluated + // which should allow the __amdil_is_constant function to be + // evaluated correctly. + void doIsConstCallConversionIfNeeded(); + bool mChanged; + bool mDebug; + bool mConvertAtomics; + CodeGenOpt::Level optLevel; + // Run a series of tests to see if we can optimize a CALL instruction. + bool optimizeCallInst(BasicBlock::iterator *bbb); + // A peephole optimization to optimize bit extract sequences. + bool optimizeBitExtract(Instruction *inst); + // A peephole optimization to optimize bit insert sequences. + bool optimizeBitInsert(Instruction *inst); + bool setupBitInsert(Instruction *base, + Instruction *&src, + Constant *&mask, + Constant *&shift); + // Expand the bit field insert instruction on versions of OpenCL that + // don't support it. + bool expandBFI(CallInst *CI); + // Expand the bit field mask instruction on version of OpenCL that + // don't support it. + bool expandBFM(CallInst *CI); + // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in + // this case we need to expand them. These functions check for 24bit functions + // and then expand. + bool isSigned24BitOps(CallInst *CI); + void expandSigned24BitOps(CallInst *CI); + // One optimization that can occur is that if the required workgroup size is + // specified then the result of get_local_size is known at compile time and + // can be returned accordingly. + bool isRWGLocalOpt(CallInst *CI); + // On northern island cards, the division is slightly less accurate than on + // previous generations, so we need to utilize a more accurate division. So we + // can translate the accurate divide to a normal divide on all other cards. + bool convertAccurateDivide(CallInst *CI); + void expandAccurateDivide(CallInst *CI); + // If the alignment is set incorrectly, it can produce really inefficient + // code. This checks for this scenario and fixes it if possible. + bool correctMisalignedMemOp(Instruction *inst); + + // If we are in no opt mode, then we need to make sure that + // local samplers are properly propagated as constant propagation + // doesn't occur and we need to know the value of kernel defined + // samplers at compile time. + bool propagateSamplerInst(CallInst *CI); + + // Helper functions + + // Group of functions that recursively calculate the size of a structure based + // on it's sub-types. + size_t getTypeSize(Type * const T, bool dereferencePtr = false); + size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); + size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); + size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); + size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); + size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); + size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); + size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); + + LLVMContext *mCTX; + Function *mF; + const AMDGPUSubtarget *mSTM; + SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs; + SmallVector<CallInst *, 16> isConstVec; +}; // class AMDGPUPeepholeOpt + char AMDGPUPeepholeOpt::ID = 0; + +// A template function that has two levels of looping before calling the +// function with a pointer to the current iterator. +template<class InputIterator, class SecondIterator, class Function> +Function safeNestedForEach(InputIterator First, InputIterator Last, + SecondIterator S, Function F) { + for ( ; First != Last; ++First) { + SecondIterator sf, sl; + for (sf = First->begin(), sl = First->end(); + sf != sl; ) { + if (!F(&sf)) { + ++sf; + } + } + } + return F; +} + +} // anonymous namespace + +namespace llvm { + FunctionPass * + createAMDGPUPeepholeOpt(TargetMachine &tm) { + return new AMDGPUPeepholeOpt(tm); + } +} // llvm namespace + +AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm) + : FunctionPass(ID), TM(tm) { + mDebug = DEBUGME; + optLevel = TM.getOptLevel(); + +} + +AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() { +} + +const char * +AMDGPUPeepholeOpt::getPassName() const { + return "AMDGPU PeepHole Optimization Pass"; +} + +bool +containsPointerType(Type *Ty) { + if (!Ty) { + return false; + } + switch(Ty->getTypeID()) { + default: + return false; + case Type::StructTyID: { + const StructType *ST = dyn_cast<StructType>(Ty); + for (StructType::element_iterator stb = ST->element_begin(), + ste = ST->element_end(); stb != ste; ++stb) { + if (!containsPointerType(*stb)) { + continue; + } + return true; + } + break; + } + case Type::VectorTyID: + case Type::ArrayTyID: + return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType()); + case Type::PointerTyID: + return true; + }; + return false; +} + +bool +AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) { + bool dumpAll = false; + for (Function::const_arg_iterator cab = F.arg_begin(), + cae = F.arg_end(); cab != cae; ++cab) { + const Argument *arg = cab; + const PointerType *PT = dyn_cast<PointerType>(arg->getType()); + if (!PT) { + continue; + } + Type *DereferencedType = PT->getElementType(); + if (!dyn_cast<StructType>(DereferencedType) + ) { + continue; + } + if (!containsPointerType(DereferencedType)) { + continue; + } + // FIXME: Because a pointer inside of a struct/union may be aliased to + // another pointer we need to take the conservative approach and place all + // pointers into the arena until more advanced detection is implemented. + dumpAll = true; + } + return dumpAll; +} +void +AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() { + if (isConstVec.empty()) { + return; + } + for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { + CallInst *CI = isConstVec[x]; + Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) + : ConstantInt::get(aType, 0); + CI->replaceAllUsesWith(Val); + CI->eraseFromParent(); + } + isConstVec.clear(); +} +void +AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) { + // Don't do anything if we don't have any atomic operations. + if (atomicFuncs.empty()) { + return; + } + // Change the function name for the atomic if it is required + uint32_t size = atomicFuncs.size(); + for (uint32_t x = 0; x < size; ++x) { + atomicFuncs[x].first->setOperand( + atomicFuncs[x].first->getNumOperands()-1, + atomicFuncs[x].second); + + } + mChanged = true; + if (mConvertAtomics) { + return; + } +} + +bool +AMDGPUPeepholeOpt::runOnFunction(Function &MF) { + mChanged = false; + mF = &MF; + mSTM = &TM.getSubtarget<AMDGPUSubtarget>(); + if (mDebug) { + MF.dump(); + } + mCTX = &MF.getType()->getContext(); + mConvertAtomics = true; + safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), + std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations), + this)); + + doAtomicConversionIfNeeded(MF); + doIsConstCallConversionIfNeeded(); + + if (mDebug) { + MF.dump(); + } + return mChanged; +} + +bool +AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) { + Instruction *inst = (*bbb); + CallInst *CI = dyn_cast<CallInst>(inst); + if (!CI) { + return false; + } + if (isSigned24BitOps(CI)) { + expandSigned24BitOps(CI); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + if (propagateSamplerInst(CI)) { + return false; + } + if (expandBFI(CI) || expandBFM(CI)) { + ++(*bbb); + CI->eraseFromParent(); + return true; + } + if (convertAccurateDivide(CI)) { + expandAccurateDivide(CI); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + + StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); + if (calleeName.startswith("__amdil_is_constant")) { + // If we do not have optimizations, then this + // cannot be properly evaluated, so we add the + // call instruction to a vector and process + // them at the end of processing after the + // samplers have been correctly handled. + if (optLevel == CodeGenOpt::None) { + isConstVec.push_back(CI); + return false; + } else { + Constant *CV = dyn_cast<Constant>(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) + : ConstantInt::get(aType, 0); + CI->replaceAllUsesWith(Val); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + } + + if (calleeName.equals("__amdil_is_asic_id_i32")) { + ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = CV; + if (Val) { + Val = ConstantInt::get(aType, + mSTM->device()->getDeviceFlag() & CV->getZExtValue()); + } else { + Val = ConstantInt::get(aType, 0); + } + CI->replaceAllUsesWith(Val); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1)); + if (!F) { + return false; + } + if (F->getName().startswith("__atom") && !CI->getNumUses() + && F->getName().find("_xchg") == StringRef::npos) { + std::string buffer(F->getName().str() + "_noret"); + F = dyn_cast<Function>( + F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); + atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F)); + } + + if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment) + && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) { + return false; + } + if (!mConvertAtomics) { + return false; + } + StringRef name = F->getName(); + if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { + mConvertAtomics = false; + } + return false; +} + +bool +AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, + Instruction *&src, + Constant *&mask, + Constant *&shift) { + if (!base) { + if (mDebug) { + dbgs() << "Null pointer passed into function.\n"; + } + return false; + } + bool andOp = false; + if (base->getOpcode() == Instruction::Shl) { + shift = dyn_cast<Constant>(base->getOperand(1)); + } else if (base->getOpcode() == Instruction::And) { + mask = dyn_cast<Constant>(base->getOperand(1)); + andOp = true; + } else { + if (mDebug) { + dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; + } + // If the base is neither a Shl or a And, we don't fit any of the patterns above. + return false; + } + src = dyn_cast<Instruction>(base->getOperand(0)); + if (!src) { + if (mDebug) { + dbgs() << "Failed setup since the base operand is not an instruction!\n"; + } + return false; + } + // If we find an 'and' operation, then we don't need to + // find the next operation as we already know the + // bits that are valid at this point. + if (andOp) { + return true; + } + if (src->getOpcode() == Instruction::Shl && !shift) { + shift = dyn_cast<Constant>(src->getOperand(1)); + src = dyn_cast<Instruction>(src->getOperand(0)); + } else if (src->getOpcode() == Instruction::And && !mask) { + mask = dyn_cast<Constant>(src->getOperand(1)); + } + if (!mask && !shift) { + if (mDebug) { + dbgs() << "Failed setup since both mask and shift are NULL!\n"; + } + // Did not find a constant mask or a shift. + return false; + } + return true; +} +bool +AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) { + if (!inst) { + return false; + } + if (!inst->isBinaryOp()) { + return false; + } + if (inst->getOpcode() != Instruction::Or) { + return false; + } + if (optLevel == CodeGenOpt::None) { + return false; + } + // We want to do an optimization on a sequence of ops that in the end equals a + // single ISA instruction. + // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) + // Some simplified versions of this pattern are as follows: + // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 + // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E + // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B + // (A & B) | (D << F) when (1 << F) >= B + // (A << C) | (D & E) when (1 << C) >= E + if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { + // The HD4XXX hardware doesn't support the ubit_insert instruction. + return false; + } + Type *aType = inst->getType(); + bool isVector = aType->isVectorTy(); + int numEle = 1; + // This optimization only works on 32bit integers. + if (aType->getScalarType() + != Type::getInt32Ty(inst->getContext())) { + return false; + } + if (isVector) { + const VectorType *VT = dyn_cast<VectorType>(aType); + numEle = VT->getNumElements(); + // We currently cannot support more than 4 elements in a intrinsic and we + // cannot support Vec3 types. + if (numEle > 4 || numEle == 3) { + return false; + } + } + // TODO: Handle vectors. + if (isVector) { + if (mDebug) { + dbgs() << "!!! Vectors are not supported yet!\n"; + } + return false; + } + Instruction *LHSSrc = NULL, *RHSSrc = NULL; + Constant *LHSMask = NULL, *RHSMask = NULL; + Constant *LHSShift = NULL, *RHSShift = NULL; + Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0)); + Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1)); + if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { + if (mDebug) { + dbgs() << "Found an OR Operation that failed setup!\n"; + inst->dump(); + if (LHS) { LHS->dump(); } + if (LHSSrc) { LHSSrc->dump(); } + if (LHSMask) { LHSMask->dump(); } + if (LHSShift) { LHSShift->dump(); } + } + // There was an issue with the setup for BitInsert. + return false; + } + if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { + if (mDebug) { + dbgs() << "Found an OR Operation that failed setup!\n"; + inst->dump(); + if (RHS) { RHS->dump(); } + if (RHSSrc) { RHSSrc->dump(); } + if (RHSMask) { RHSMask->dump(); } + if (RHSShift) { RHSShift->dump(); } + } + // There was an issue with the setup for BitInsert. + return false; + } + if (mDebug) { + dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; + dbgs() << "Op: "; inst->dump(); + dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } + } + Constant *offset = NULL; + Constant *width = NULL; + uint32_t lhsMaskVal = 0, rhsMaskVal = 0; + uint32_t lhsShiftVal = 0, rhsShiftVal = 0; + uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0; + uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0; + lhsMaskVal = (LHSMask + ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0); + rhsMaskVal = (RHSMask + ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0); + lhsShiftVal = (LHSShift + ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0); + rhsShiftVal = (RHSShift + ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0); + lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; + rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; + lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; + rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; + // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). + if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { + return false; + } + if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { + offset = ConstantInt::get(aType, lhsMaskOffset, false); + width = ConstantInt::get(aType, lhsMaskWidth, false); + RHSSrc = RHS; + if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { + return false; + } + if (!LHSShift) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", LHS); + } else if (lhsShiftVal != lhsMaskOffset) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", LHS); + } + if (mDebug) { + dbgs() << "Optimizing LHS!\n"; + } + } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { + offset = ConstantInt::get(aType, rhsMaskOffset, false); + width = ConstantInt::get(aType, rhsMaskWidth, false); + LHSSrc = RHSSrc; + RHSSrc = LHS; + if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { + return false; + } + if (!RHSShift) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", RHS); + } else if (rhsShiftVal != rhsMaskOffset) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", RHS); + } + if (mDebug) { + dbgs() << "Optimizing RHS!\n"; + } + } else { + if (mDebug) { + dbgs() << "Failed constraint 3!\n"; + } + return false; + } + if (mDebug) { + dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } + } + if (!offset || !width) { + if (mDebug) { + dbgs() << "Either width or offset are NULL, failed detection!\n"; + } + return false; + } + // Lets create the function signature. + std::vector<Type *> callTypes; + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + FunctionType *funcType = FunctionType::get(aType, callTypes, false); + std::string name = "__amdil_ubit_insert"; + if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } + Function *Func = + dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> + getOrInsertFunction(StringRef(name), funcType)); + Value *Operands[4] = { + width, + offset, + LHSSrc, + RHSSrc + }; + CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); + if (mDebug) { + dbgs() << "Old Inst: "; + inst->dump(); + dbgs() << "New Inst: "; + CI->dump(); + dbgs() << "\n\n"; + } + CI->insertBefore(inst); + inst->replaceAllUsesWith(CI); + return true; +} + +bool +AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) { + if (!inst) { + return false; + } + if (!inst->isBinaryOp()) { + return false; + } + if (inst->getOpcode() != Instruction::And) { + return false; + } + if (optLevel == CodeGenOpt::None) { + return false; + } + // We want to do some simple optimizations on Shift right/And patterns. The + // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a + // value smaller than 32 and C is a mask. If C is a constant value, then the + // following transformation can occur. For signed integers, it turns into the + // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned + // integers, it turns into the function call dst = + // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract + // can be found in Section 7.9 of the ATI IL spec of the stream SDK for + // Evergreen hardware. + if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) { + // This does not work on HD4XXX hardware. + return false; + } + Type *aType = inst->getType(); + bool isVector = aType->isVectorTy(); + + // XXX Support vector types + if (isVector) { + return false; + } + int numEle = 1; + // This only works on 32bit integers + if (aType->getScalarType() + != Type::getInt32Ty(inst->getContext())) { + return false; + } + if (isVector) { + const VectorType *VT = dyn_cast<VectorType>(aType); + numEle = VT->getNumElements(); + // We currently cannot support more than 4 elements in a intrinsic and we + // cannot support Vec3 types. + if (numEle > 4 || numEle == 3) { + return false; + } + } + BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0)); + // If the first operand is not a shift instruction, then we can return as it + // doesn't match this pattern. + if (!ShiftInst || !ShiftInst->isShift()) { + return false; + } + // If we are a shift left, then we need don't match this pattern. + if (ShiftInst->getOpcode() == Instruction::Shl) { + return false; + } + bool isSigned = ShiftInst->isArithmeticShift(); + Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1)); + Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1)); + // Lets make sure that the shift value and the and mask are constant integers. + if (!AndMask || !ShrVal) { + return false; + } + Constant *newMaskConst; + Constant *shiftValConst; + if (isVector) { + // Handle the vector case + std::vector<Constant *> maskVals; + std::vector<Constant *> shiftVals; + ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask); + ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal); + Type *scalarType = AndMaskVec->getType()->getScalarType(); + assert(AndMaskVec->getNumOperands() == + ShrValVec->getNumOperands() && "cannot have a " + "combination where the number of elements to a " + "shift and an and are different!"); + for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { + ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x)); + ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x)); + if (!AndCI || !ShiftIC) { + return false; + } + uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); + if (!isMask_32(maskVal)) { + return false; + } + maskVal = (uint32_t)CountTrailingOnes_32(maskVal); + uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); + // If the mask or shiftval is greater than the bitcount, then break out. + if (maskVal >= 32 || shiftVal >= 32) { + return false; + } + // If the mask val is greater than the the number of original bits left + // then this optimization is invalid. + if (maskVal > (32 - shiftVal)) { + return false; + } + maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); + shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); + } + newMaskConst = ConstantVector::get(maskVals); + shiftValConst = ConstantVector::get(shiftVals); + } else { + // Handle the scalar case + uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue(); + // This must be a mask value where all lower bits are set to 1 and then any + // bit higher is set to 0. + if (!isMask_32(maskVal)) { + return false; + } + maskVal = (uint32_t)CountTrailingOnes_32(maskVal); + // Count the number of bits set in the mask, this is the width of the + // resulting bit set that is extracted from the source value. + uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue(); + // If the mask or shift val is greater than the bitcount, then break out. + if (maskVal >= 32 || shiftVal >= 32) { + return false; + } + // If the mask val is greater than the the number of original bits left then + // this optimization is invalid. + if (maskVal > (32 - shiftVal)) { + return false; + } + newMaskConst = ConstantInt::get(aType, maskVal, isSigned); + shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); + } + // Lets create the function signature. + std::vector<Type *> callTypes; + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + FunctionType *funcType = FunctionType::get(aType, callTypes, false); + std::string name = "llvm.AMDGPU.bit.extract.u32"; + if (isVector) { + name += ".v" + itostr(numEle) + "i32"; + } else { + name += "."; + } + // Lets create the function. + Function *Func = + dyn_cast<Function>(inst->getParent()->getParent()->getParent()-> + getOrInsertFunction(StringRef(name), funcType)); + Value *Operands[3] = { + ShiftInst->getOperand(0), + shiftValConst, + newMaskConst + }; + // Lets create the Call with the operands + CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); + CI->setDoesNotAccessMemory(); + CI->insertBefore(inst); + inst->replaceAllUsesWith(CI); + return true; +} + +bool +AMDGPUPeepholeOpt::expandBFI(CallInst *CI) { + if (!CI) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + if (!LHS->getName().startswith("__amdil_bfi")) { + return false; + } + Type* type = CI->getOperand(0)->getType(); + Constant *negOneConst = NULL; + if (type->isVectorTy()) { + std::vector<Constant *> negOneVals; + negOneConst = ConstantInt::get(CI->getContext(), + APInt(32, StringRef("-1"), 10)); + for (size_t x = 0, + y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { + negOneVals.push_back(negOneConst); + } + negOneConst = ConstantVector::get(negOneVals); + } else { + negOneConst = ConstantInt::get(CI->getContext(), + APInt(32, StringRef("-1"), 10)); + } + // __amdil_bfi => (A & B) | (~A & C) + BinaryOperator *lhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(0), + CI->getOperand(1), "bfi_and", CI); + BinaryOperator *rhs = + BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, + "bfi_not", CI); + rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), + "bfi_and", CI); + lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); + CI->replaceAllUsesWith(lhs); + return true; +} + +bool +AMDGPUPeepholeOpt::expandBFM(CallInst *CI) { + if (!CI) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + if (!LHS->getName().startswith("__amdil_bfm")) { + return false; + } + // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) + Constant *newMaskConst = NULL; + Constant *newShiftConst = NULL; + Type* type = CI->getOperand(0)->getType(); + if (type->isVectorTy()) { + std::vector<Constant*> newMaskVals, newShiftVals; + newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); + newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); + for (size_t x = 0, + y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) { + newMaskVals.push_back(newMaskConst); + newShiftVals.push_back(newShiftConst); + } + newMaskConst = ConstantVector::get(newMaskVals); + newShiftConst = ConstantVector::get(newShiftVals); + } else { + newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); + newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); + } + BinaryOperator *lhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(0), + newMaskConst, "bfm_mask", CI); + lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, + lhs, "bfm_shl", CI); + lhs = BinaryOperator::Create(Instruction::Sub, lhs, + newShiftConst, "bfm_sub", CI); + BinaryOperator *rhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(1), + newMaskConst, "bfm_mask", CI); + lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); + CI->replaceAllUsesWith(lhs); + return true; +} + +bool +AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) { + Instruction *inst = (*bbb); + if (optimizeCallInst(bbb)) { + return true; + } + if (optimizeBitExtract(inst)) { + return false; + } + if (optimizeBitInsert(inst)) { + return false; + } + if (correctMisalignedMemOp(inst)) { + return false; + } + return false; +} +bool +AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) { + LoadInst *linst = dyn_cast<LoadInst>(inst); + StoreInst *sinst = dyn_cast<StoreInst>(inst); + unsigned alignment; + Type* Ty = inst->getType(); + if (linst) { + alignment = linst->getAlignment(); + Ty = inst->getType(); + } else if (sinst) { + alignment = sinst->getAlignment(); + Ty = sinst->getValueOperand()->getType(); + } else { + return false; + } + unsigned size = getTypeSize(Ty); + if (size == alignment || size < alignment) { + return false; + } + if (!Ty->isStructTy()) { + return false; + } + if (alignment < 4) { + if (linst) { + linst->setAlignment(0); + return true; + } else if (sinst) { + sinst->setAlignment(0); + return true; + } + } + return false; +} +bool +AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) { + if (!CI) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + std::string namePrefix = LHS->getName().substr(0, 14); + if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" + && namePrefix != "__amdil__imul24_high") { + return false; + } + if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) { + return false; + } + return true; +} + +void +AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) { + assert(isSigned24BitOps(CI) && "Must be a " + "signed 24 bit operation to call this function!"); + Value *LHS = CI->getOperand(CI->getNumOperands()-1); + // On 7XX and 8XX we do not have signed 24bit, so we need to + // expand it to the following: + // imul24 turns into 32bit imul + // imad24 turns into 32bit imad + // imul24_high turns into 32bit imulhigh + if (LHS->getName().substr(0, 14) == "__amdil_imad24") { + Type *aType = CI->getOperand(0)->getType(); + bool isVector = aType->isVectorTy(); + int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; + std::vector<Type*> callTypes; + callTypes.push_back(CI->getOperand(0)->getType()); + callTypes.push_back(CI->getOperand(1)->getType()); + callTypes.push_back(CI->getOperand(2)->getType()); + FunctionType *funcType = + FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); + std::string name = "__amdil_imad"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + Function *Func = dyn_cast<Function>( + CI->getParent()->getParent()->getParent()-> + getOrInsertFunction(StringRef(name), funcType)); + Value *Operands[3] = { + CI->getOperand(0), + CI->getOperand(1), + CI->getOperand(2) + }; + CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); + nCI->insertBefore(CI); + CI->replaceAllUsesWith(nCI); + } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { + BinaryOperator *mulOp = + BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), + CI->getOperand(1), "imul24", CI); + CI->replaceAllUsesWith(mulOp); + } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { + Type *aType = CI->getOperand(0)->getType(); + + bool isVector = aType->isVectorTy(); + int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1; + std::vector<Type*> callTypes; + callTypes.push_back(CI->getOperand(0)->getType()); + callTypes.push_back(CI->getOperand(1)->getType()); + FunctionType *funcType = + FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); + std::string name = "__amdil_imul_high"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + Function *Func = dyn_cast<Function>( + CI->getParent()->getParent()->getParent()-> + getOrInsertFunction(StringRef(name), funcType)); + Value *Operands[2] = { + CI->getOperand(0), + CI->getOperand(1) + }; + CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); + nCI->insertBefore(CI); + CI->replaceAllUsesWith(nCI); + } +} + +bool +AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) { + return (CI != NULL + && CI->getOperand(CI->getNumOperands() - 1)->getName() + == "__amdil_get_local_size_int"); +} + +bool +AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) { + if (!CI) { + return false; + } + if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX + && (mSTM->getDeviceName() == "cayman")) { + return false; + } + return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) + == "__amdil_improved_div"; +} + +void +AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) { + assert(convertAccurateDivide(CI) + && "expanding accurate divide can only happen if it is expandable!"); + BinaryOperator *divOp = + BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), + CI->getOperand(1), "fdiv32", CI); + CI->replaceAllUsesWith(divOp); +} + +bool +AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) { + if (optLevel != CodeGenOpt::None) { + return false; + } + + if (!CI) { + return false; + } + + unsigned funcNameIdx = 0; + funcNameIdx = CI->getNumOperands() - 1; + StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); + if (calleeName != "__amdil_image2d_read_norm" + && calleeName != "__amdil_image2d_read_unnorm" + && calleeName != "__amdil_image3d_read_norm" + && calleeName != "__amdil_image3d_read_unnorm") { + return false; + } + + unsigned samplerIdx = 2; + samplerIdx = 1; + Value *sampler = CI->getOperand(samplerIdx); + LoadInst *lInst = dyn_cast<LoadInst>(sampler); + if (!lInst) { + return false; + } + + if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + return false; + } + + GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand()); + // If we are loading from what is not a global value, then we + // fail and return. + if (!gv) { + return false; + } + + // If we don't have an initializer or we have an initializer and + // the initializer is not a 32bit integer, we fail. + if (!gv->hasInitializer() + || !gv->getInitializer()->getType()->isIntegerTy(32)) { + return false; + } + + // Now that we have the global variable initializer, lets replace + // all uses of the load instruction with the samplerVal and + // reparse the __amdil_is_constant() function. + Constant *samplerVal = gv->getInitializer(); + lInst->replaceAllUsesWith(samplerVal); + return true; +} + +bool +AMDGPUPeepholeOpt::doInitialization(Module &M) { + return false; +} + +bool +AMDGPUPeepholeOpt::doFinalization(Module &M) { + return false; +} + +void +AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); +} + +size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { + size_t size = 0; + if (!T) { + return size; + } + switch (T->getTypeID()) { + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + assert(0 && "These types are not supported by this backend"); + default: + case Type::FloatTyID: + case Type::DoubleTyID: + size = T->getPrimitiveSizeInBits() >> 3; + break; + case Type::PointerTyID: + size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); + break; + case Type::IntegerTyID: + size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); + break; + case Type::StructTyID: + size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); + break; + case Type::ArrayTyID: + size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); + break; + case Type::FunctionTyID: + size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); + break; + case Type::VectorTyID: + size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); + break; + }; + return size; +} + +size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST, + bool dereferencePtr) { + size_t size = 0; + if (!ST) { + return size; + } + Type *curType; + StructType::element_iterator eib; + StructType::element_iterator eie; + for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { + curType = *eib; + size += getTypeSize(curType, dereferencePtr); + } + return size; +} + +size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT, + bool dereferencePtr) { + return IT ? (IT->getBitWidth() >> 3) : 0; +} + +size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT, + bool dereferencePtr) { + assert(0 && "Should not be able to calculate the size of an function type"); + return 0; +} + +size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT, + bool dereferencePtr) { + return (size_t)(AT ? (getTypeSize(AT->getElementType(), + dereferencePtr) * AT->getNumElements()) + : 0); +} + +size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT, + bool dereferencePtr) { + return VT ? (VT->getBitWidth() >> 3) : 0; +} + +size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT, + bool dereferencePtr) { + if (!PT) { + return 0; + } + Type *CT = PT->getElementType(); + if (CT->getTypeID() == Type::StructTyID && + PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { + return getTypeSize(dyn_cast<StructType>(CT)); + } else if (dereferencePtr) { + size_t size = 0; + for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { + size += getTypeSize(PT->getContainedType(x), dereferencePtr); + } + return size; + } else { + return 4; + } +} + +size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT, + bool dereferencePtr) { + //assert(0 && "Should not be able to calculate the size of an opaque type"); + return 4; +} diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td new file mode 100644 index 0000000000..b9d033432e --- /dev/null +++ b/lib/Target/R600/AMDILRegisterInfo.td @@ -0,0 +1,107 @@ +//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Declarations that describe the AMDIL register file +// +//===----------------------------------------------------------------------===// + +class AMDILReg<bits<16> num, string n> : Register<n> { + field bits<16> Value; + let Value = num; + let Namespace = "AMDGPU"; +} + +// We will start with 8 registers for each class before expanding to more +// Since the swizzle is added based on the register class, we can leave it +// off here and just specify different registers for different register classes +def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>; +def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>; +def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>; +def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>; +def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>; +def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>; +def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>; +def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>; +def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>; +def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>; +def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>; +def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>; +def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>; +def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>; +def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>; +def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>; +def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>; +def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>; +def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>; +def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>; + +// All registers between 1000 and 1024 are reserved and cannot be used +// unless commented in this section +// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's +// r1020 is used to hold the frame index for local arrays +// r1019 is used to hold the dynamic stack allocation pointer +// r1018 is used as a temporary register for handwritten code +// r1017 is used as a temporary register for handwritten code +// r1016 is used as a temporary register for load/store code +// r1015 is used as a temporary register for data segment offset +// r1014 is used as a temporary register for store code +// r1013 is used as the section data pointer register +// r1012-r1010 and r1001-r1008 are used for temporary I/O registers +// r1009 is used as the frame pointer register +// r999 is used as the mem register. +// r998 is used as the return address register. +//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>; +//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>; +//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>; +//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>; +//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>; +//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>; +def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>; +def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>; +def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>; +def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>; +def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>; +def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>; +def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>; +def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>; +def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>; +def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>; +def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>; +def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>; +def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>; +def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>; +def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>; +def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>; +def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>; +def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>; +def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>; +def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>; +def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>; +def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>; +def GPRI16 : RegisterClass<"AMDGPU", [i16], 16, + (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { + let AltOrders = [(add (sequence "R%u", 1, 20))]; + let AltOrderSelect = [{ + return 1; + }]; + } +def GPRI32 : RegisterClass<"AMDGPU", [i32], 32, + (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { + let AltOrders = [(add (sequence "R%u", 1, 20))]; + let AltOrderSelect = [{ + return 1; + }]; + } +def GPRF32 : RegisterClass<"AMDGPU", [f32], 32, + (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { + let AltOrders = [(add (sequence "R%u", 1, 20))]; + let AltOrderSelect = [{ + return 1; + }]; + } diff --git a/lib/Target/R600/AMDILSIDevice.cpp b/lib/Target/R600/AMDILSIDevice.cpp new file mode 100644 index 0000000000..3096c22067 --- /dev/null +++ b/lib/Target/R600/AMDILSIDevice.cpp @@ -0,0 +1,45 @@ +//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//==-----------------------------------------------------------------------===// +#include "AMDILSIDevice.h" +#include "AMDGPUSubtarget.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" + +using namespace llvm; + +AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST) + : AMDGPUEvergreenDevice(ST) { +} +AMDGPUSIDevice::~AMDGPUSIDevice() { +} + +size_t +AMDGPUSIDevice::getMaxLDSSize() const { + if (usesHardware(AMDGPUDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDGPUSIDevice::getGeneration() const { + return AMDGPUDeviceInfo::HD7XXX; +} + +std::string +AMDGPUSIDevice::getDataLayout() const { + return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/lib/Target/R600/AMDILSIDevice.h b/lib/Target/R600/AMDILSIDevice.h new file mode 100644 index 0000000000..5b2cb25022 --- /dev/null +++ b/lib/Target/R600/AMDILSIDevice.h @@ -0,0 +1,39 @@ +//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface for the subtarget data classes. +/// +/// This file will define the interface that each generation needs to +/// implement in order to correctly answer queries on the capabilities of the +/// specific hardware. +//===---------------------------------------------------------------------===// +#ifndef AMDILSIDEVICE_H +#define AMDILSIDEVICE_H +#include "AMDILEvergreenDevice.h" + +namespace llvm { +class AMDGPUSubtarget; +//===---------------------------------------------------------------------===// +// SI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +/// \brief The AMDGPUSIDevice is the base class for all Southern Island series +/// of cards. +class AMDGPUSIDevice : public AMDGPUEvergreenDevice { +public: + AMDGPUSIDevice(AMDGPUSubtarget*); + virtual ~AMDGPUSIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; + virtual std::string getDataLayout() const; +}; + +} // namespace llvm +#endif // AMDILSIDEVICE_H diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt new file mode 100644 index 0000000000..ce0b56bdf0 --- /dev/null +++ b/lib/Target/R600/CMakeLists.txt @@ -0,0 +1,55 @@ +set(LLVM_TARGET_DEFINITIONS AMDGPU.td) + +tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) +tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer) +tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) +add_public_tablegen_target(AMDGPUCommonTableGen) + +add_llvm_target(R600CodeGen + AMDIL7XXDevice.cpp + AMDILCFGStructurizer.cpp + AMDILDevice.cpp + AMDILDeviceInfo.cpp + AMDILEvergreenDevice.cpp + AMDILFrameLowering.cpp + AMDILIntrinsicInfo.cpp + AMDILISelDAGToDAG.cpp + AMDILISelLowering.cpp + AMDILNIDevice.cpp + AMDILPeepholeOptimizer.cpp + AMDILSIDevice.cpp + AMDGPUAsmPrinter.cpp + AMDGPUMCInstLower.cpp + AMDGPUSubtarget.cpp + AMDGPUStructurizeCFG.cpp + AMDGPUTargetMachine.cpp + AMDGPUISelLowering.cpp + AMDGPUConvertToISA.cpp + AMDGPUInstrInfo.cpp + AMDGPURegisterInfo.cpp + R600ExpandSpecialInstrs.cpp + R600InstrInfo.cpp + R600ISelLowering.cpp + R600MachineFunctionInfo.cpp + R600RegisterInfo.cpp + SIAnnotateControlFlow.cpp + SIAssignInterpRegs.cpp + SIInstrInfo.cpp + SIISelLowering.cpp + SILowerLiteralConstants.cpp + SILowerControlFlow.cpp + SIMachineFunctionInfo.cpp + SIRegisterInfo.cpp + ) + +add_dependencies(LLVMR600CodeGen intrinsics_gen) + +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp new file mode 100644 index 0000000000..e6c550b5ac --- /dev/null +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -0,0 +1,132 @@ +//===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// \file +//===----------------------------------------------------------------------===// + +#include "AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot) { + printInstruction(MI, OS); + + printAnnotation(OS, Annot); +} + +void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + switch (Op.getReg()) { + // This is the default predicate state, so we don't need to print it. + case AMDGPU::PRED_SEL_OFF: break; + default: O << getRegisterName(Op.getReg()); break; + } + } else if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isFPImm()) { + O << Op.getFPImm(); + } else { + assert(!"unknown operand type in printOperand"); + } +} + +void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printOperand(MI, OpNo, O); + O << ", "; + printOperand(MI, OpNo + 1, O); +} + +void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo, + raw_ostream &O, StringRef Asm) { + const MCOperand &Op = MI->getOperand(OpNo); + assert(Op.isImm()); + if (Op.getImm() == 1) { + O << Asm; + } +} + +void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printIfSet(MI, OpNo, O, "|"); +} + +void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printIfSet(MI, OpNo, O, "_SAT"); +} + +void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + union Literal { + float f; + int32_t i; + } L; + + L.i = MI->getOperand(OpNo).getImm(); + O << L.i << "(" << L.f << ")"; +} + +void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printIfSet(MI, OpNo, O, " *"); +} + +void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printIfSet(MI, OpNo, O, "-"); +} + +void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + default: break; + case 1: + O << " * 2.0"; + break; + case 2: + O << " * 4.0"; + break; + case 3: + O << " / 2.0"; + break; + } +} + +void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.getImm() != 0) { + O << " + " << Op.getImm(); + } +} + +void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printIfSet(MI, OpNo, O, "ExecMask,"); +} + +void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printIfSet(MI, OpNo, O, "Pred,"); +} + +void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.getImm() == 0) { + O << " (MASKED)"; + } +} + +#include "AMDGPUGenAsmWriter.inc" diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h new file mode 100644 index 0000000000..96e0e46f8a --- /dev/null +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -0,0 +1,52 @@ +//===-- AMDGPUInstPrinter.h - AMDGPU MC Inst -> ASM interface ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUINSTPRINTER_H +#define AMDGPUINSTPRINTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class AMDGPUInstPrinter : public MCInstPrinter { +public: + AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + //Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + +private: + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm); + void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; + +} // End namespace llvm + +#endif // AMDGPUINSTRPRINTER_H diff --git a/lib/Target/R600/InstPrinter/CMakeLists.txt b/lib/Target/R600/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000000..069c55ba94 --- /dev/null +++ b/lib/Target/R600/InstPrinter/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMR600AsmPrinter + AMDGPUInstPrinter.cpp + ) + +add_dependencies(LLVMR600AsmPrinter AMDGPUCommonTableGen) diff --git a/lib/Target/R600/InstPrinter/LLVMBuild.txt b/lib/Target/R600/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000000..ec0be89f10 --- /dev/null +++ b/lib/Target/R600/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/R600/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = R600AsmPrinter +parent = R600 +required_libraries = MC Support +add_to_library_groups = R600 + diff --git a/lib/Target/R600/InstPrinter/Makefile b/lib/Target/R600/InstPrinter/Makefile new file mode 100644 index 0000000000..a794cc1124 --- /dev/null +++ b/lib/Target/R600/InstPrinter/Makefile @@ -0,0 +1,15 @@ +#===- lib/Target/R600/AsmPrinter/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMR600AsmPrinter + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/R600/LLVMBuild.txt b/lib/Target/R600/LLVMBuild.txt new file mode 100644 index 0000000000..f2a7554e52 --- /dev/null +++ b/lib/Target/R600/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = R600 +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = R600CodeGen +parent = R600 +required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC R600AsmPrinter R600Desc R600Info +add_to_library_groups = R600 diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp new file mode 100644 index 0000000000..98fca43267 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -0,0 +1,90 @@ +//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +namespace { + +class AMDGPUMCObjectWriter : public MCObjectWriter { +public: + AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } + virtual void ExecutePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) { + //XXX: Implement if necessary. + } + virtual void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue) { + assert(!"Not implemented"); + } + + virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); + +}; + +class AMDGPUAsmBackend : public MCAsmBackend { +public: + AMDGPUAsmBackend(const Target &T) + : MCAsmBackend() {} + + virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const; + virtual unsigned getNumFixupKinds() const { return 0; }; + virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + return false; + } + virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const { + assert(!"Not implemented"); + } + virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; } + virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { + return true; + } +}; + +} //End anonymous namespace + +void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { + for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) { + Asm.writeSectionData(I, Layout); + } +} + +MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT, + StringRef CPU) { + return new AMDGPUAsmBackend(T); +} + +AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter( + raw_ostream &OS) const { + return new AMDGPUMCObjectWriter(OS); +} + +void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { + + uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset()); + assert(Fixup.getKind() == FK_PCRel_4); + *Dst = (Value - 4) / 4; +} diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp new file mode 100644 index 0000000000..4d3d3e7945 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -0,0 +1,85 @@ +//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCAsmInfo.h" + +using namespace llvm; +AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() { + HasSingleParameterDotFile = false; + WeakDefDirective = 0; + //===------------------------------------------------------------------===// + HasSubsectionsViaSymbols = true; + HasMachoZeroFillDirective = false; + HasMachoTBSSDirective = false; + HasStaticCtorDtorReferenceInStaticMode = false; + LinkerRequiresNonEmptyDwarfLines = true; + MaxInstLength = 16; + PCSymbol = "$"; + SeparatorString = "\n"; + CommentColumn = 40; + CommentString = ";"; + LabelSuffix = ":"; + GlobalPrefix = "@"; + PrivateGlobalPrefix = ";."; + LinkerPrivateGlobalPrefix = "!"; + InlineAsmStart = ";#ASMSTART"; + InlineAsmEnd = ";#ASMEND"; + AssemblerDialect = 0; + AllowQuotesInName = false; + AllowNameToStartWithDigit = false; + AllowPeriodsInName = false; + + //===--- Data Emission Directives -------------------------------------===// + ZeroDirective = ".zero"; + AsciiDirective = ".ascii\t"; + AscizDirective = ".asciz\t"; + Data8bitsDirective = ".byte\t"; + Data16bitsDirective = ".short\t"; + Data32bitsDirective = ".long\t"; + Data64bitsDirective = ".quad\t"; + GPRel32Directive = 0; + SunStyleELFSectionSwitchSyntax = true; + UsesELFSectionDirectiveForBSS = true; + HasMicrosoftFastStdCallMangling = false; + + //===--- Alignment Information ----------------------------------------===// + AlignDirective = ".align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + + //===--- Global Variable Emission Directives --------------------------===// + GlobalDirective = ".global"; + ExternDirective = ".extern"; + HasSetDirective = false; + HasAggressiveSymbolFolding = true; + COMMDirectiveAlignmentIsInBytes = false; + HasDotTypeDotSizeDirective = false; + HasNoDeadStrip = true; + HasSymbolResolver = false; + WeakRefDirective = ".weakref\t"; + LinkOnceDirective = 0; + //===--- Dwarf Emission Directives -----------------------------------===// + HasLEB128 = true; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::None; + DwarfUsesInlineInfoSection = false; + DwarfSectionOffsetDirective = ".offset"; + +} + +const char* +AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const { + return 0; +} + +const MCSection* +AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const { + return 0; +} diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h new file mode 100644 index 0000000000..3ad0fa6824 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - AMDGPU MCAsm Interface ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUMCASMINFO_H +#define AMDGPUMCASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" +namespace llvm { + +class Target; +class StringRef; + +class AMDGPUMCAsmInfo : public MCAsmInfo { +public: + explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT); + const char* getDataASDirective(unsigned int Size, unsigned int AS) const; + const MCSection* getNonexecutableStackSection(MCContext &CTX) const; +}; +} // namespace llvm +#endif // AMDGPUMCASMINFO_H diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h new file mode 100644 index 0000000000..9d0d6cf6fd --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -0,0 +1,60 @@ +//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief CodeEmitter interface for R600 and SI codegen. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUCODEEMITTER_H +#define AMDGPUCODEEMITTER_H + +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class MCInst; +class MCOperand; + +class AMDGPUMCCodeEmitter : public MCCodeEmitter { +public: + + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + return 0; + } + + virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + return 0; + } + virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + return 0; + } + virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const { + return Value; + } + virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + return 0; + } + virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + return 0; + } +}; + +} // End namespace llvm + +#endif // AMDGPUCODEEMITTER_H diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp new file mode 100644 index 0000000000..072ee49b63 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -0,0 +1,113 @@ +//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This file provides AMDGPU specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCTargetDesc.h" +#include "AMDGPUMCAsmInfo.h" +#include "InstPrinter/AMDGPUInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "AMDGPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AMDGPUGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "AMDGPUGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createAMDGPUMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAMDGPUMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAMDGPUMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo * X = new MCSubtargetInfo(); + InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + return new AMDGPUInstPrinter(MAI, MII, MRI); +} + +static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) { + return createSIMCCodeEmitter(MCII, MRI, STI, Ctx); + } else { + return createR600MCCodeEmitter(MCII, MRI, STI, Ctx); + } +} + +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &MAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + return createPureStreamer(Ctx, MAB, _OS, _Emitter); +} + +extern "C" void LLVMInitializeR600TargetMC() { + + RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget); + + TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo); + + TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo); + + TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo); + + TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo); + + TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter); + + TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter); + + TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend); + + TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer); +} diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h new file mode 100644 index 0000000000..363a4af3f3 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -0,0 +1,55 @@ +//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Provides AMDGPU specific target descriptions. +// +//===----------------------------------------------------------------------===// +// + +#ifndef AMDGPUMCTARGETDESC_H +#define AMDGPUMCTARGETDESC_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCRegisterInfo; +class MCSubtargetInfo; +class Target; + +extern Target TheAMDGPUTarget; + +MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); + +MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT, + StringRef CPU); +} // End llvm namespace + +#define GET_REGINFO_ENUM +#include "AMDGPUGenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#include "AMDGPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AMDGPUGenSubtargetInfo.inc" + +#endif // AMDGPUMCTARGETDESC_H diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000000..37e714c2e7 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,10 @@ + +add_llvm_library(LLVMR600Desc + AMDGPUAsmBackend.cpp + AMDGPUMCTargetDesc.cpp + AMDGPUMCAsmInfo.cpp + R600MCCodeEmitter.cpp + SIMCCodeEmitter.cpp + ) + +add_dependencies(LLVMR600Desc AMDGPUCommonTableGen) diff --git a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000000..b1beab0bb3 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = R600Desc +parent = R600 +required_libraries = R600AsmPrinter R600Info MC +add_to_library_groups = R600 diff --git a/lib/Target/R600/MCTargetDesc/Makefile b/lib/Target/R600/MCTargetDesc/Makefile new file mode 100644 index 0000000000..8894a7607f --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMR600Desc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp new file mode 100644 index 0000000000..36deae9c0a --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -0,0 +1,574 @@ +//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// This code emitter outputs bytecode that is understood by the r600g driver +/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, +/// but it still needs to be run through a finalizer in order to be executed +/// by the GPU. +/// +/// [1] http://www.mesa3d.org/ +// +//===----------------------------------------------------------------------===// + +#include "R600Defines.h" +#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" +#include <stdio.h> + +#define SRC_BYTE_COUNT 11 +#define DST_BYTE_COUNT 5 + +using namespace llvm; + +namespace { + +class R600MCCodeEmitter : public AMDGPUMCCodeEmitter { + R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + const MCSubtargetInfo &STI; + MCContext &Ctx; + +public: + + R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + const MCSubtargetInfo &sti, MCContext &ctx) + : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + + /// \brief Encode the instruction and write it to the OS. + virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// \returns the encoding for an MCOperand. + virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; +private: + + void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, + raw_ostream &OS) const; + void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const; + void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value, + raw_ostream &OS) const; + void EmitDst(const MCInst &MI, raw_ostream &OS) const; + void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, + raw_ostream &OS) const; + void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; + + void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; + + void EmitByte(unsigned int byte, raw_ostream &OS) const; + + void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const; + + void Emit(uint32_t value, raw_ostream &OS) const; + void Emit(uint64_t value, raw_ostream &OS) const; + + unsigned getHWRegChan(unsigned reg) const; + unsigned getHWReg(unsigned regNo) const; + + bool isFCOp(unsigned opcode) const; + bool isTexOp(unsigned opcode) const; + bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const; + +}; + +} // End anonymous namespace + +enum RegElement { + ELEMENT_X = 0, + ELEMENT_Y, + ELEMENT_Z, + ELEMENT_W +}; + +enum InstrTypes { + INSTR_ALU = 0, + INSTR_TEX, + INSTR_FC, + INSTR_NATIVE, + INSTR_VTX, + INSTR_EXPORT +}; + +enum FCInstr { + FC_IF_PREDICATE = 0, + FC_ELSE, + FC_ENDIF, + FC_BGNLOOP, + FC_ENDLOOP, + FC_BREAK_PREDICATE, + FC_CONTINUE +}; + +enum TextureTypes { + TEXTURE_1D = 1, + TEXTURE_2D, + TEXTURE_3D, + TEXTURE_CUBE, + TEXTURE_RECT, + TEXTURE_SHADOW1D, + TEXTURE_SHADOW2D, + TEXTURE_SHADOWRECT, + TEXTURE_1D_ARRAY, + TEXTURE_2D_ARRAY, + TEXTURE_SHADOW1D_ARRAY, + TEXTURE_SHADOW2D_ARRAY +}; + +MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new R600MCCodeEmitter(MCII, MRI, STI, Ctx); +} + +void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + if (isTexOp(MI.getOpcode())) { + EmitTexInstr(MI, Fixups, OS); + } else if (isFCOp(MI.getOpcode())){ + EmitFCInstr(MI, OS); + } else if (MI.getOpcode() == AMDGPU::RETURN || + MI.getOpcode() == AMDGPU::BUNDLE || + MI.getOpcode() == AMDGPU::KILL) { + return; + } else { + switch(MI.getOpcode()) { + case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { + uint64_t inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_NATIVE, OS); + Emit(inst, OS); + break; + } + case AMDGPU::CONSTANT_LOAD_eg: + case AMDGPU::VTX_READ_PARAM_8_eg: + case AMDGPU::VTX_READ_PARAM_16_eg: + case AMDGPU::VTX_READ_PARAM_32_eg: + case AMDGPU::VTX_READ_GLOBAL_8_eg: + case AMDGPU::VTX_READ_GLOBAL_32_eg: + case AMDGPU::VTX_READ_GLOBAL_128_eg: { + uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); + uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset + + EmitByte(INSTR_VTX, OS); + Emit(InstWord01, OS); + Emit(InstWord2, OS); + break; + } + case AMDGPU::EG_ExportSwz: + case AMDGPU::R600_ExportSwz: + case AMDGPU::EG_ExportBuf: + case AMDGPU::R600_ExportBuf: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_EXPORT, OS); + Emit(Inst, OS); + break; + } + + default: + EmitALUInstr(MI, Fixups, OS); + break; + } + } +} + +void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + raw_ostream &OS) const { + const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); + unsigned NumOperands = MI.getNumOperands(); + + // Emit instruction type + EmitByte(INSTR_ALU, OS); + + uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); + + //older alu have different encoding for instructions with one or two src + //parameters. + if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) && + !(MCDesc.TSFlags & R600_InstFlag::OP3)) { + uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39); + InstWord01 &= ~(0x3FFULL << 39); + InstWord01 |= ISAOpCode << 1; + } + + unsigned SrcIdx = 0; + for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) { + if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() || + OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) { + continue; + } + EmitSrcISA(MI, OpIdx, InstWord01, OS); + SrcIdx++; + } + + // Emit zeros for unused sources + for ( ; SrcIdx < 3; SrcIdx++) { + EmitNullBytes(SRC_BYTE_COUNT - 6, OS); + } + + Emit(InstWord01, OS); + return; +} + +void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx, + raw_ostream &OS) const { + const MCOperand &MO = MI.getOperand(OpIdx); + union { + float f; + uint32_t i; + } Value; + Value.i = 0; + // Emit the source select (2 bytes). For GPRs, this is the register index. + // For other potential instruction operands, (e.g. constant registers) the + // value of the source select is defined in the r600isa docs. + if (MO.isReg()) { + unsigned reg = MO.getReg(); + EmitTwoBytes(getHWReg(reg), OS); + if (reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + Value.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + Value.i = ImmOp.getImm(); + } + } + } else { + // XXX: Handle other operand types. + EmitTwoBytes(0, OS); + } + + // Emit the source channel (1 byte) + if (MO.isReg()) { + EmitByte(getHWRegChan(MO.getReg()), OS); + } else { + EmitByte(0, OS); + } + + // XXX: Emit isNegated (1 byte) + if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS))) + && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) || + (MO.isReg() && + (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ + EmitByte(1, OS); + } else { + EmitByte(0, OS); + } + + // Emit isAbsolute (1 byte) + if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) { + EmitByte(1, OS); + } else { + EmitByte(0, OS); + } + + // XXX: Emit relative addressing mode (1 byte) + EmitByte(0, OS); + + // Emit kc_bank, This will be adjusted later by r600_asm + EmitByte(0, OS); + + // Emit the literal value, if applicable (4 bytes). + Emit(Value.i, OS); + +} + +void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx, + uint64_t &Value, raw_ostream &OS) const { + const MCOperand &MO = MI.getOperand(OpIdx); + union { + float f; + uint32_t i; + } InlineConstant; + InlineConstant.i = 0; + // Emit the source select (2 bytes). For GPRs, this is the register index. + // For other potential instruction operands, (e.g. constant registers) the + // value of the source select is defined in the r600isa docs. + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) { + EmitByte(1, OS); + } else { + EmitByte(0, OS); + } + + if (Reg == AMDGPU::ALU_LITERAL_X) { + unsigned ImmOpIndex = MI.getNumOperands() - 1; + MCOperand ImmOp = MI.getOperand(ImmOpIndex); + if (ImmOp.isFPImm()) { + InlineConstant.f = ImmOp.getFPImm(); + } else { + assert(ImmOp.isImm()); + InlineConstant.i = ImmOp.getImm(); + } + } + } + + // Emit the literal value, if applicable (4 bytes). + Emit(InlineConstant.i, OS); +} + +void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups, + raw_ostream &OS) const { + + unsigned Opcode = MI.getOpcode(); + bool hasOffsets = (Opcode == AMDGPU::TEX_LD); + unsigned OpOffset = hasOffsets ? 3 : 0; + int64_t Resource = MI.getOperand(OpOffset + 2).getImm(); + int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); + int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); + unsigned srcSelect[4] = {0, 1, 2, 3}; + + // Emit instruction type + EmitByte(1, OS); + + // Emit instruction + EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); + + // Emit resource id + EmitByte(Resource, OS); + + // Emit source register + EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); + + // XXX: Emit src isRelativeAddress + EmitByte(0, OS); + + // Emit destination register + EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); + + // XXX: Emit dst isRealtiveAddress + EmitByte(0, OS); + + // XXX: Emit dst select + EmitByte(0, OS); // X + EmitByte(1, OS); // Y + EmitByte(2, OS); // Z + EmitByte(3, OS); // W + + // XXX: Emit lod bias + EmitByte(0, OS); + + // XXX: Emit coord types + unsigned coordType[4] = {1, 1, 1, 1}; + + if (TextureType == TEXTURE_RECT + || TextureType == TEXTURE_SHADOWRECT) { + coordType[ELEMENT_X] = 0; + coordType[ELEMENT_Y] = 0; + } + + if (TextureType == TEXTURE_1D_ARRAY + || TextureType == TEXTURE_SHADOW1D_ARRAY) { + if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) { + coordType[ELEMENT_Y] = 0; + } else { + coordType[ELEMENT_Z] = 0; + srcSelect[ELEMENT_Z] = ELEMENT_Y; + } + } else if (TextureType == TEXTURE_2D_ARRAY + || TextureType == TEXTURE_SHADOW2D_ARRAY) { + coordType[ELEMENT_Z] = 0; + } + + for (unsigned i = 0; i < 4; i++) { + EmitByte(coordType[i], OS); + } + + // XXX: Emit offsets + if (hasOffsets) + for (unsigned i = 2; i < 5; i++) + EmitByte(MI.getOperand(i).getImm()<<1, OS); + else + EmitNullBytes(3, OS); + + // Emit sampler id + EmitByte(Sampler, OS); + + // XXX:Emit source select + if ((TextureType == TEXTURE_SHADOW1D + || TextureType == TEXTURE_SHADOW2D + || TextureType == TEXTURE_SHADOWRECT + || TextureType == TEXTURE_SHADOW1D_ARRAY) + && Opcode != AMDGPU::TEX_SAMPLE_C_L + && Opcode != AMDGPU::TEX_SAMPLE_C_LB) { + srcSelect[ELEMENT_W] = ELEMENT_Z; + } + + for (unsigned i = 0; i < 4; i++) { + EmitByte(srcSelect[i], OS); + } +} + +void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { + + // Emit instruction type + EmitByte(INSTR_FC, OS); + + // Emit SRC + unsigned NumOperands = MI.getNumOperands(); + if (NumOperands > 0) { + assert(NumOperands == 1); + EmitSrc(MI, 0, OS); + } else { + EmitNullBytes(SRC_BYTE_COUNT, OS); + } + + // Emit FC Instruction + enum FCInstr instr; + switch (MI.getOpcode()) { + case AMDGPU::PREDICATED_BREAK: + instr = FC_BREAK_PREDICATE; + break; + case AMDGPU::CONTINUE: + instr = FC_CONTINUE; + break; + case AMDGPU::IF_PREDICATE_SET: + instr = FC_IF_PREDICATE; + break; + case AMDGPU::ELSE: + instr = FC_ELSE; + break; + case AMDGPU::ENDIF: + instr = FC_ENDIF; + break; + case AMDGPU::ENDLOOP: + instr = FC_ENDLOOP; + break; + case AMDGPU::WHILELOOP: + instr = FC_BGNLOOP; + break; + default: + abort(); + break; + } + EmitByte(instr, OS); +} + +void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount, + raw_ostream &OS) const { + + for (unsigned int i = 0; i < ByteCount; i++) { + EmitByte(0, OS); + } +} + +void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const { + OS.write((uint8_t) Byte & 0xff); +} + +void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes, + raw_ostream &OS) const { + OS.write((uint8_t) (Bytes & 0xff)); + OS.write((uint8_t) ((Bytes >> 8) & 0xff)); +} + +void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const { + for (unsigned i = 0; i < 4; i++) { + OS.write((uint8_t) ((Value >> (8 * i)) & 0xff)); + } +} + +void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const { + for (unsigned i = 0; i < 8; i++) { + EmitByte((Value >> (8 * i)) & 0xff, OS); + } +} + +unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const { + return MRI.getEncodingValue(reg) >> HW_CHAN_SHIFT; +} + +unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const { + return MRI.getEncodingValue(RegNo) & HW_REG_MASK; +} + +uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixup) const { + if (MO.isReg()) { + if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) { + return MRI.getEncodingValue(MO.getReg()); + } else { + return getHWReg(MO.getReg()); + } + } else if (MO.isImm()) { + return MO.getImm(); + } else { + assert(0); + return 0; + } +} + +//===----------------------------------------------------------------------===// +// Encoding helper functions +//===----------------------------------------------------------------------===// + +bool R600MCCodeEmitter::isFCOp(unsigned opcode) const { + switch(opcode) { + default: return false; + case AMDGPU::PREDICATED_BREAK: + case AMDGPU::CONTINUE: + case AMDGPU::IF_PREDICATE_SET: + case AMDGPU::ELSE: + case AMDGPU::ENDIF: + case AMDGPU::ENDLOOP: + case AMDGPU::WHILELOOP: + return true; + } +} + +bool R600MCCodeEmitter::isTexOp(unsigned opcode) const { + switch(opcode) { + default: return false; + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: + return true; + } +} + +bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand, + unsigned Flag) const { + const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode()); + unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags); + if (FlagIndex == 0) { + return false; + } + assert(MI.getOperand(FlagIndex).isImm()); + return !!((MI.getOperand(FlagIndex).getImm() >> + (NUM_MO_FLAGS * Operand)) & Flag); +} + +#include "AMDGPUGenMCCodeEmitter.inc" diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp new file mode 100644 index 0000000000..b4bdb25289 --- /dev/null +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -0,0 +1,298 @@ +//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief The SI code emitter produces machine code that can be executed +/// directly on the GPU device. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/AMDGPUMCCodeEmitter.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" + +#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) +#define SI_INSTR_FLAGS_ENCODING_MASK 0xf + +// These must be kept in sync with SIInstructions.td and also the +// InstrEncodingInfo array in SIInstrInfo.cpp. +// +// NOTE: This enum is only used to identify the encoding type within LLVM, +// the actual encoding type that is part of the instruction format is different +namespace SIInstrEncodingType { + enum Encoding { + EXP = 0, + LDS = 1, + MIMG = 2, + MTBUF = 3, + MUBUF = 4, + SMRD = 5, + SOP1 = 6, + SOP2 = 7, + SOPC = 8, + SOPK = 9, + SOPP = 10, + VINTRP = 11, + VOP1 = 12, + VOP2 = 13, + VOP3 = 14, + VOPC = 15 + }; +} + +using namespace llvm; + +namespace { +class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { + SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + const MCSubtargetInfo &STI; + MCContext &Ctx; + +public: + SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + const MCSubtargetInfo &sti, MCContext &ctx) + : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + + ~SIMCCodeEmitter() { } + + /// \breif Encode the instruction and write it to the OS. + virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// \returns the encoding for an MCOperand. + virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + +public: + + /// \brief Encode a sequence of registers with the correct alignment. + unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; + + /// \brief Encoding for when 2 consecutive registers are used + virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixup) const; + + /// \brief Encoding for when 4 consectuive registers are used + virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixup) const; + + /// \brief Encoding for SMRD indexed loads + virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixup) const; + + /// \brief Post-Encoder method for VOP instructions + virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const; + +private: + + /// \returns this SIInstrEncodingType for this instruction. + unsigned getEncodingType(const MCInst &MI) const; + + /// \brief Get then size in bytes of this instructions encoding. + unsigned getEncodingBytes(const MCInst &MI) const; + + /// \returns the hardware encoding for a register + unsigned getRegBinaryCode(unsigned reg) const; + + /// \brief Generated function that returns the hardware encoding for + /// a register + unsigned getHWRegNum(unsigned reg) const; + +}; + +} // End anonymous namespace + +MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SIMCCodeEmitter(MCII, MRI, STI, Ctx); +} + +void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups); + unsigned bytes = getEncodingBytes(MI); + for (unsigned i = 0; i < bytes; i++) { + OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff)); + } +} + +uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + return getRegBinaryCode(MO.getReg()); + } else if (MO.isImm()) { + return MO.getImm(); + } else if (MO.isFPImm()) { + // XXX: Not all instructions can use inline literals + // XXX: We should make sure this is a 32-bit constant + union { + float F; + uint32_t I; + } Imm; + Imm.F = MO.getFPImm(); + return Imm.I; + } else if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(FK_PCRel_4); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + return 0; + } else{ + llvm_unreachable("Encoding of this operand type is not supported yet."); + } + return 0; +} + +//===----------------------------------------------------------------------===// +// Custom Operand Encodings +//===----------------------------------------------------------------------===// + +unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, + unsigned shift) const { + unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg()); + return regCode >> shift; + return 0; +} +unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, + unsigned OpNo , + SmallVectorImpl<MCFixup> &Fixup) const { + return GPRAlign(MI, OpNo, 1); +} + +unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, + unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixup) const { + return GPRAlign(MI, OpNo, 2); +} + +#define SMRD_OFFSET_MASK 0xff +#define SMRD_IMM_SHIFT 8 +#define SMRD_SBASE_MASK 0x3f +#define SMRD_SBASE_SHIFT 9 +/// This function is responsibe for encoding the offset +/// and the base ptr for SMRD instructions it should return a bit string in +/// this format: +/// +/// OFFSET = bits{7-0} +/// IMM = bits{8} +/// SBASE = bits{14-9} +/// +uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixup) const { + uint32_t Encoding; + + const MCOperand &OffsetOp = MI.getOperand(OpNo + 1); + + //XXX: Use this function for SMRD loads with register offsets + assert(OffsetOp.isImm()); + + Encoding = + (getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK) + | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit + | ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) + ; + + return Encoding; +} + +//===----------------------------------------------------------------------===// +// Post Encoder Callbacks +//===----------------------------------------------------------------------===// + +uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{ + unsigned encodingType = getEncodingType(MI); + unsigned numSrcOps; + unsigned vgprBitOffset; + + if (encodingType == SIInstrEncodingType::VOP3) { + numSrcOps = 3; + vgprBitOffset = 32; + } else { + numSrcOps = 1; + vgprBitOffset = 0; + } + + // Add one to skip over the destination reg operand. + for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { + const MCOperand &MO = MI.getOperand(opIdx); + if (MO.isReg()) { + unsigned reg = MI.getOperand(opIdx).getReg(); + if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) || + AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) { + Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; + } + } else if (MO.isFPImm()) { + union { + float f; + uint32_t i; + } Imm; + // XXX: Not all instructions can use inline literals + // XXX: We should make sure this is a 32-bit constant + Imm.f = MO.getFPImm(); + Value |= ((uint64_t)Imm.i) << 32; + } + } + return Value; +} + +//===----------------------------------------------------------------------===// +// Encoding helper functions +//===----------------------------------------------------------------------===// + +unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const { + return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK; +} + +unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const { + + // These instructions aren't real instructions with an encoding type, so + // we need to manually specify their size. + switch (MI.getOpcode()) { + default: break; + case AMDGPU::SI_LOAD_LITERAL_I32: + case AMDGPU::SI_LOAD_LITERAL_F32: + return 4; + } + + unsigned encoding_type = getEncodingType(MI); + switch (encoding_type) { + case SIInstrEncodingType::EXP: + case SIInstrEncodingType::LDS: + case SIInstrEncodingType::MUBUF: + case SIInstrEncodingType::MTBUF: + case SIInstrEncodingType::MIMG: + case SIInstrEncodingType::VOP3: + return 8; + default: + return 4; + } +} + + +unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const { + switch (reg) { + case AMDGPU::M0: return 124; + case AMDGPU::SREG_LIT_0: return 128; + case AMDGPU::SI_LITERAL_CONSTANT: return 255; + default: return MRI.getEncodingValue(reg); + } +} + diff --git a/lib/Target/R600/Makefile b/lib/Target/R600/Makefile new file mode 100644 index 0000000000..1b3ebbe8c8 --- /dev/null +++ b/lib/Target/R600/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/R600/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMR600CodeGen +TARGET = AMDGPU + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \ + AMDGPUGenDAGISel.inc AMDGPUGenSubtargetInfo.inc \ + AMDGPUGenMCCodeEmitter.inc AMDGPUGenCallingConv.inc \ + AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \ + AMDGPUGenAsmWriter.inc + +DIRS = InstPrinter TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td new file mode 100644 index 0000000000..3dc1ecda77 --- /dev/null +++ b/lib/Target/R600/Processors.td @@ -0,0 +1,29 @@ +//===-- Processors.td - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AMDIL processors supported. +// +//===----------------------------------------------------------------------===// + +class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features> +: Processor<Name, itin, Features>; +def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>; +def : Proc<"rv710", R600_EG_Itin, []>; +def : Proc<"rv730", R600_EG_Itin, []>; +def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; +def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; +def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; +def : Proc<"SI", SI_Itin, [Feature64BitPtr]>; + diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h new file mode 100644 index 0000000000..7dea8e44ea --- /dev/null +++ b/lib/Target/R600/R600Defines.h @@ -0,0 +1,79 @@ +//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef R600DEFINES_H_ +#define R600DEFINES_H_ + +#include "llvm/MC/MCRegisterInfo.h" + +// Operand Flags +#define MO_FLAG_CLAMP (1 << 0) +#define MO_FLAG_NEG (1 << 1) +#define MO_FLAG_ABS (1 << 2) +#define MO_FLAG_MASK (1 << 3) +#define MO_FLAG_PUSH (1 << 4) +#define MO_FLAG_NOT_LAST (1 << 5) +#define MO_FLAG_LAST (1 << 6) +#define NUM_MO_FLAGS 7 + +/// \brief Helper for getting the operand index for the instruction flags +/// operand. +#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3) + +namespace R600_InstFlag { + enum TIF { + TRANS_ONLY = (1 << 0), + TEX = (1 << 1), + REDUCTION = (1 << 2), + FC = (1 << 3), + TRIG = (1 << 4), + OP3 = (1 << 5), + VECTOR = (1 << 6), + //FlagOperand bits 7, 8 + NATIVE_OPERANDS = (1 << 9), + OP1 = (1 << 10), + OP2 = (1 << 11) + }; +} + +#define HAS_NATIVE_OPERANDS(Flags) ((Flags) & R600_InstFlag::NATIVE_OPERANDS) + +/// \brief Defines for extracting register infomation from register encoding +#define HW_REG_MASK 0x1ff +#define HW_CHAN_SHIFT 9 + +namespace R600Operands { + enum Ops { + DST, + UPDATE_EXEC_MASK, + UPDATE_PREDICATE, + WRITE, + OMOD, + DST_REL, + CLAMP, + SRC0, + SRC0_NEG, + SRC0_REL, + SRC0_ABS, + SRC1, + SRC1_NEG, + SRC1_REL, + SRC1_ABS, + SRC2, + SRC2_NEG, + SRC2_REL, + LAST, + PRED_SEL, + IMM, + COUNT + }; +} + +#endif // R600DEFINES_H_ diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp new file mode 100644 index 0000000000..b903d4aedd --- /dev/null +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -0,0 +1,334 @@ +//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Vector, Reduction, and Cube instructions need to fill the entire instruction +/// group to work correctly. This pass expands these individual instructions +/// into several instructions that will completely fill the instruction group. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class R600ExpandSpecialInstrsPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + + bool ExpandInputPerspective(MachineInstr& MI); + bool ExpandInputConstant(MachineInstr& MI); + +public: + R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "R600 Expand special instructions pass"; + } +}; + +} // End anonymous namespace + +char R600ExpandSpecialInstrsPass::ID = 0; + +FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { + return new R600ExpandSpecialInstrsPass(TM); +} + +bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI) { + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + if (MI.getOpcode() != AMDGPU::input_perspective) + return false; + + MachineBasicBlock::iterator I = &MI; + unsigned DstReg = MI.getOperand(0).getReg(); + R600MachineFunctionInfo *MFI = MI.getParent()->getParent() + ->getInfo<R600MachineFunctionInfo>(); + unsigned IJIndexBase; + + // In Evergreen ISA doc section 8.3.2 : + // We need to interpolate XY and ZW in two different instruction groups. + // An INTERP_* must occupy all 4 slots of an instruction group. + // Output of INTERP_XY is written in X,Y slots + // Output of INTERP_ZW is written in Z,W slots + // + // Thus interpolation requires the following sequences : + // + // AnyGPR.x = INTERP_ZW; (Write Masked Out) + // AnyGPR.y = INTERP_ZW; (Write Masked Out) + // DstGPR.z = INTERP_ZW; + // DstGPR.w = INTERP_ZW; (End of first IG) + // DstGPR.x = INTERP_XY; + // DstGPR.y = INTERP_XY; + // AnyGPR.z = INTERP_XY; (Write Masked Out) + // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG) + // + switch (MI.getOperand(1).getImm()) { + case 0: + IJIndexBase = MFI->GetIJPerspectiveIndex(); + break; + case 1: + IJIndexBase = MFI->GetIJLinearIndex(); + break; + default: + assert(0 && "Unknow ij index"); + } + + for (unsigned i = 0; i < 8; i++) { + unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister( + 2 * IJIndexBase + ((i + 1) % 2)); + unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(2).getImm()); + + + unsigned Sel = AMDGPU::sel_x; + switch (i % 4) { + case 0:Sel = AMDGPU::sel_x;break; + case 1:Sel = AMDGPU::sel_y;break; + case 2:Sel = AMDGPU::sel_z;break; + case 3:Sel = AMDGPU::sel_w;break; + default:break; + } + + unsigned Res = TRI.getSubReg(DstReg, Sel); + + unsigned Opcode = (i < 4)?AMDGPU::INTERP_ZW:AMDGPU::INTERP_XY; + + MachineBasicBlock &MBB = *(MI.getParent()); + MachineInstr *NewMI = + TII->buildDefaultInstruction(MBB, I, Opcode, Res, IJIndex, ReadReg); + + if (!(i> 1 && i < 6)) { + TII->addFlag(NewMI, 0, MO_FLAG_MASK); + } + + if (i % 4 != 3) + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + + return true; +} + +bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI) { + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + if (MI.getOpcode() != AMDGPU::input_constant) + return false; + + MachineBasicBlock::iterator I = &MI; + unsigned DstReg = MI.getOperand(0).getReg(); + + for (unsigned i = 0; i < 4; i++) { + unsigned ReadReg = AMDGPU::R600_ArrayBaseRegClass.getRegister( + MI.getOperand(1).getImm()); + + unsigned Sel = AMDGPU::sel_x; + switch (i % 4) { + case 0:Sel = AMDGPU::sel_x;break; + case 1:Sel = AMDGPU::sel_y;break; + case 2:Sel = AMDGPU::sel_z;break; + case 3:Sel = AMDGPU::sel_w;break; + default:break; + } + + unsigned Res = TRI.getSubReg(DstReg, Sel); + + MachineBasicBlock &MBB = *(MI.getParent()); + MachineInstr *NewMI = TII->buildDefaultInstruction( + MBB, I, AMDGPU::INTERP_LOAD_P0, Res, ReadReg); + + if (i % 4 != 3) + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + } + + MI.eraseFromParent(); + + return true; +} + +bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) { + + const R600RegisterInfo &TRI = TII->getRegisterInfo(); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + MachineBasicBlock::iterator I = MBB.begin(); + while (I != MBB.end()) { + MachineInstr &MI = *I; + I = llvm::next(I); + + switch (MI.getOpcode()) { + default: break; + // Expand PRED_X to one of the PRED_SET instructions. + case AMDGPU::PRED_X: { + uint64_t Flags = MI.getOperand(3).getImm(); + // The native opcode used by PRED_X is stored as an immediate in the + // third operand. + MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, + MI.getOperand(2).getImm(), // opcode + MI.getOperand(0).getReg(), // dst + MI.getOperand(1).getReg(), // src0 + AMDGPU::ZERO); // src1 + TII->addFlag(PredSet, 0, MO_FLAG_MASK); + if (Flags & MO_FLAG_PUSH) { + TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); + } else { + TII->setImmOperand(PredSet, R600Operands::UPDATE_PREDICATE, 1); + } + MI.eraseFromParent(); + continue; + } + case AMDGPU::BREAK: + MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I, + AMDGPU::PRED_SETE_INT, + AMDGPU::PREDICATE_BIT, + AMDGPU::ZERO, + AMDGPU::ZERO); + TII->addFlag(PredSet, 0, MO_FLAG_MASK); + TII->setImmOperand(PredSet, R600Operands::UPDATE_EXEC_MASK, 1); + + BuildMI(MBB, I, MBB.findDebugLoc(I), + TII->get(AMDGPU::PREDICATED_BREAK)) + .addReg(AMDGPU::PREDICATE_BIT); + MI.eraseFromParent(); + continue; + } + + if (ExpandInputPerspective(MI)) + continue; + if (ExpandInputConstant(MI)) + continue; + + bool IsReduction = TII->isReductionOp(MI.getOpcode()); + bool IsVector = TII->isVector(MI); + bool IsCube = TII->isCubeOp(MI.getOpcode()); + if (!IsReduction && !IsVector && !IsCube) { + continue; + } + + // Expand the instruction + // + // Reduction instructions: + // T0_X = DP4 T1_XYZW, T2_XYZW + // becomes: + // TO_X = DP4 T1_X, T2_X + // TO_Y (write masked) = DP4 T1_Y, T2_Y + // TO_Z (write masked) = DP4 T1_Z, T2_Z + // TO_W (write masked) = DP4 T1_W, T2_W + // + // Vector instructions: + // T0_X = MULLO_INT T1_X, T2_X + // becomes: + // T0_X = MULLO_INT T1_X, T2_X + // T0_Y (write masked) = MULLO_INT T1_X, T2_X + // T0_Z (write masked) = MULLO_INT T1_X, T2_X + // T0_W (write masked) = MULLO_INT T1_X, T2_X + // + // Cube instructions: + // T0_XYZW = CUBE T1_XYZW + // becomes: + // TO_X = CUBE T1_Z, T1_Y + // T0_Y = CUBE T1_Z, T1_X + // T0_Z = CUBE T1_X, T1_Z + // T0_W = CUBE T1_Y, T1_Z + for (unsigned Chan = 0; Chan < 4; Chan++) { + unsigned DstReg = MI.getOperand( + TII->getOperandIdx(MI, R600Operands::DST)).getReg(); + unsigned Src0 = MI.getOperand( + TII->getOperandIdx(MI, R600Operands::SRC0)).getReg(); + unsigned Src1 = 0; + + // Determine the correct source registers + if (!IsCube) { + int Src1Idx = TII->getOperandIdx(MI, R600Operands::SRC1); + if (Src1Idx != -1) { + Src1 = MI.getOperand(Src1Idx).getReg(); + } + } + if (IsReduction) { + unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); + Src0 = TRI.getSubReg(Src0, SubRegIndex); + Src1 = TRI.getSubReg(Src1, SubRegIndex); + } else if (IsCube) { + static const int CubeSrcSwz[] = {2, 2, 0, 1}; + unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]); + unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]); + Src1 = TRI.getSubReg(Src0, SubRegIndex1); + Src0 = TRI.getSubReg(Src0, SubRegIndex0); + } + + // Determine the correct destination registers; + bool Mask = false; + bool NotLast = true; + if (IsCube) { + unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan); + DstReg = TRI.getSubReg(DstReg, SubRegIndex); + } else { + // Mask the write if the original instruction does not write to + // the current Channel. + Mask = (Chan != TRI.getHWRegChan(DstReg)); + unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK; + DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan); + } + + // Set the IsLast bit + NotLast = (Chan != 3 ); + + // Add the new instruction + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AMDGPU::CUBE_r600_pseudo: + Opcode = AMDGPU::CUBE_r600_real; + break; + case AMDGPU::CUBE_eg_pseudo: + Opcode = AMDGPU::CUBE_eg_real; + break; + case AMDGPU::DOT4_r600_pseudo: + Opcode = AMDGPU::DOT4_r600_real; + break; + case AMDGPU::DOT4_eg_pseudo: + Opcode = AMDGPU::DOT4_eg_real; + break; + default: + break; + } + + MachineInstr *NewMI = + TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1); + + if (Chan != 0) + NewMI->bundleWithPred(); + if (Mask) { + TII->addFlag(NewMI, 0, MO_FLAG_MASK); + } + if (NotLast) { + TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST); + } + } + MI.eraseFromParent(); + } + } + return false; +} diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp new file mode 100644 index 0000000000..f0eece39ea --- /dev/null +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -0,0 +1,909 @@ +//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Custom DAG lowering for R600 +// +//===----------------------------------------------------------------------===// + +#include "R600ISelLowering.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +R600TargetLowering::R600TargetLowering(TargetMachine &TM) : + AMDGPUTargetLowering(TM), + TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) { + setOperationAction(ISD::MUL, MVT::i64, Expand); + addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); + addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); + addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); + computeRegisterProperties(); + + setOperationAction(ISD::FADD, MVT::v4f32, Expand); + setOperationAction(ISD::FMUL, MVT::v4f32, Expand); + setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FSUB, MVT::v4f32, Expand); + + setOperationAction(ISD::ADD, MVT::v4i32, Expand); + setOperationAction(ISD::AND, MVT::v4i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::UDIV, MVT::v4i32, Expand); + setOperationAction(ISD::UREM, MVT::v4i32, Expand); + setOperationAction(ISD::SETCC, MVT::v4i32, Expand); + + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + + setOperationAction(ISD::FSUB, MVT::f32, Expand); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); + setOperationAction(ISD::FPOW, MVT::f32, Custom); + + setOperationAction(ISD::ROTL, MVT::i32, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); + + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::v4i32, Custom); + + setTargetDAGCombine(ISD::FP_ROUND); + + setSchedulingPreference(Sched::VLIW); +} + +MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( + MachineInstr * MI, MachineBasicBlock * BB) const { + MachineFunction * MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineBasicBlock::iterator I = *MI; + + switch (MI->getOpcode()) { + default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case AMDGPU::SHADER_TYPE: break; + case AMDGPU::CLAMP_R600: { + MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, + AMDGPU::MOV, + MI->getOperand(0).getReg(), + MI->getOperand(1).getReg()); + TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); + break; + } + + case AMDGPU::FABS_R600: { + MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, + AMDGPU::MOV, + MI->getOperand(0).getReg(), + MI->getOperand(1).getReg()); + TII->addFlag(NewMI, 0, MO_FLAG_ABS); + break; + } + + case AMDGPU::FNEG_R600: { + MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, + AMDGPU::MOV, + MI->getOperand(0).getReg(), + MI->getOperand(1).getReg()); + TII->addFlag(NewMI, 0, MO_FLAG_NEG); + break; + } + + case AMDGPU::R600_LOAD_CONST: { + int64_t RegIndex = MI->getOperand(1).getImm(); + unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) + .addOperand(MI->getOperand(0)) + .addReg(ConstantReg); + break; + } + + case AMDGPU::MASK_WRITE: { + unsigned maskedRegister = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); + MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); + TII->addFlag(defInstr, 0, MO_FLAG_MASK); + break; + } + + case AMDGPU::MOV_IMM_F32: + TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), + MI->getOperand(1).getFPImm()->getValueAPF() + .bitcastToAPInt().getZExtValue()); + break; + case AMDGPU::MOV_IMM_I32: + TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), + MI->getOperand(1).getImm()); + break; + + + case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { + unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addImm(EOP); // Set End of program bit + break; + } + + case AMDGPU::RESERVE_REG: { + R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); + int64_t ReservedIndex = MI->getOperand(0).getImm(); + unsigned ReservedReg = + AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); + MFI->ReservedRegs.push_back(ReservedReg); + unsigned SuperReg = + AMDGPU::R600_Reg128RegClass.getRegister(ReservedIndex / 4); + MFI->ReservedRegs.push_back(SuperReg); + break; + } + + case AMDGPU::TXD: { + unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)) + .addReg(T0, RegState::Implicit) + .addReg(T1, RegState::Implicit); + break; + } + + case AMDGPU::TXD_SHADOW: { + unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)) + .addReg(T0, RegState::Implicit) + .addReg(T1, RegState::Implicit); + break; + } + + case AMDGPU::BRANCH: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + .addOperand(MI->getOperand(0)) + .addReg(0); + break; + + case AMDGPU::BRANCH_COND_f32: { + MachineInstr *NewMI = + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), + AMDGPU::PREDICATE_BIT) + .addOperand(MI->getOperand(1)) + .addImm(OPCODE_IS_NOT_ZERO) + .addImm(0); // Flags + TII->addFlag(NewMI, 0, MO_FLAG_PUSH); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + .addOperand(MI->getOperand(0)) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + break; + } + + case AMDGPU::BRANCH_COND_i32: { + MachineInstr *NewMI = + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), + AMDGPU::PREDICATE_BIT) + .addOperand(MI->getOperand(1)) + .addImm(OPCODE_IS_NOT_ZERO_INT) + .addImm(0); // Flags + TII->addFlag(NewMI, 0, MO_FLAG_PUSH); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + .addOperand(MI->getOperand(0)) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + break; + } + + case AMDGPU::input_perspective: { + R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); + + // XXX Be more fine about register reservation + for (unsigned i = 0; i < 4; i ++) { + unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i); + MFI->ReservedRegs.push_back(ReservedReg); + } + + switch (MI->getOperand(1).getImm()) { + case 0:// Perspective + MFI->HasPerspectiveInterpolation = true; + break; + case 1:// Linear + MFI->HasLinearInterpolation = true; + break; + default: + assert(0 && "Unknow ij index"); + } + + return BB; + } + + case AMDGPU::EG_ExportSwz: + case AMDGPU::R600_ExportSwz: { + bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; + if (!EOP) + return BB; + unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addOperand(MI->getOperand(6)) + .addImm(CfInst) + .addImm(1); + break; + } + } + + MI->eraseFromParent(); + return BB; +} + +//===----------------------------------------------------------------------===// +// Custom DAG Lowering Operations +//===----------------------------------------------------------------------===// + +using namespace llvm::Intrinsic; +using namespace llvm::AMDGPUIntrinsic; + +static SDValue +InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, + unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, + SDValue Scalar, SDValue Chain) { + if (!ExportMap[Slot]) { + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, + DL, MVT::v4f32, + DAG.getUNDEF(MVT::v4f32), + Scalar, + DAG.getConstant(Channel, MVT::i32)); + + unsigned Mask = 1 << Channel; + + const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), + DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), + DAG.getConstant(Mask, MVT::i32)}; + + SDValue Res = DAG.getNode( + AMDGPUISD::EXPORT, + DL, + MVT::Other, + Ops, 6); + ExportMap[Slot] = Res.getNode(); + return Res; + } + + SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; + SDValue PreviousVector = ExportInstruction->getOperand(1); + SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, + DL, MVT::v4f32, + PreviousVector, + Scalar, + DAG.getConstant(Channel, MVT::i32)); + + unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5)) + ->getZExtValue(); + Mask |= (1 << Channel); + + const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, + DAG.getConstant(Inst, MVT::i32), + DAG.getConstant(Type, MVT::i32), + DAG.getConstant(Slot, MVT::i32), + DAG.getConstant(Mask, MVT::i32)}; + + DAG.UpdateNodeOperands(ExportInstruction, + Ops, 6); + + return Chain; + +} + +SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::ROTL: return LowerROTL(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::FPOW: return LowerFPOW(Op, DAG); + case ISD::INTRINSIC_VOID: { + SDValue Chain = Op.getOperand(0); + unsigned IntrinsicID = + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + switch (IntrinsicID) { + case AMDGPUIntrinsic::AMDGPU_store_output: { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); + if (!MRI.isLiveOut(Reg)) { + MRI.addLiveOut(Reg); + } + return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); + } + case AMDGPUIntrinsic::R600_store_pixel_color: { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + + SDNode **OutputsMap = MFI->Outputs; + return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, + RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), + Chain); + + } + case AMDGPUIntrinsic::R600_store_stream_output : { + MachineFunction &MF = DAG.getMachineFunction(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); + int64_t BufIndex = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); + + SDNode **OutputsMap = MFI->StreamOutputs[BufIndex]; + unsigned Inst; + switch (cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue() ) { + // STREAM3 + case 3: + Inst = 4; + break; + // STREAM2 + case 2: + Inst = 3; + break; + // STREAM1 + case 1: + Inst = 2; + break; + // STREAM0 + case 0: + Inst = 1; + break; + default: + llvm_unreachable("Wrong buffer id for stream outputs !"); + } + + return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, + RegIndex / 4, RegIndex % 4, Inst, 0, Op.getOperand(2), + Chain); + } + // default for switch(IntrinsicID) + default: break; + } + // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) + break; + } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + switch(IntrinsicID) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case AMDGPUIntrinsic::R600_load_input: { + int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); + } + case AMDGPUIntrinsic::R600_load_input_perspective: { + int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + if (slot < 0) + return DAG.getUNDEF(MVT::f32); + SDValue FullVector = DAG.getNode( + AMDGPUISD::INTERP, + DL, MVT::v4f32, + DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + } + case AMDGPUIntrinsic::R600_load_input_linear: { + int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + if (slot < 0) + return DAG.getUNDEF(MVT::f32); + SDValue FullVector = DAG.getNode( + AMDGPUISD::INTERP, + DL, MVT::v4f32, + DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + } + case AMDGPUIntrinsic::R600_load_input_constant: { + int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + if (slot < 0) + return DAG.getUNDEF(MVT::f32); + SDValue FullVector = DAG.getNode( + AMDGPUISD::INTERP_P0, + DL, MVT::v4f32, + DAG.getConstant(slot / 4 , MVT::i32)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32)); + } + + case r600_read_ngroups_x: + return LowerImplicitParameter(DAG, VT, DL, 0); + case r600_read_ngroups_y: + return LowerImplicitParameter(DAG, VT, DL, 1); + case r600_read_ngroups_z: + return LowerImplicitParameter(DAG, VT, DL, 2); + case r600_read_global_size_x: + return LowerImplicitParameter(DAG, VT, DL, 3); + case r600_read_global_size_y: + return LowerImplicitParameter(DAG, VT, DL, 4); + case r600_read_global_size_z: + return LowerImplicitParameter(DAG, VT, DL, 5); + case r600_read_local_size_x: + return LowerImplicitParameter(DAG, VT, DL, 6); + case r600_read_local_size_y: + return LowerImplicitParameter(DAG, VT, DL, 7); + case r600_read_local_size_z: + return LowerImplicitParameter(DAG, VT, DL, 8); + + case r600_read_tgid_x: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_X, VT); + case r600_read_tgid_y: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Y, VT); + case r600_read_tgid_z: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T1_Z, VT); + case r600_read_tidig_x: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_X, VT); + case r600_read_tidig_y: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Y, VT); + case r600_read_tidig_z: + return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, + AMDGPU::T0_Z, VT); + } + // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) + break; + } + } // end switch(Op.getOpcode()) + return SDValue(); +} + +void R600TargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: return; + case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); + } +} + +SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { + return DAG.getNode( + ISD::SETCC, + Op.getDebugLoc(), + MVT::i1, + Op, DAG.getConstantFP(0.0f, MVT::f32), + DAG.getCondCode(ISD::SETNE) + ); +} + +SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue CC = Op.getOperand(1); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue JumpT = Op.getOperand(4); + SDValue CmpValue; + SDValue Result; + + if (LHS.getValueType() == MVT::i32) { + CmpValue = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + MVT::i32, + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + } else if (LHS.getValueType() == MVT::f32) { + CmpValue = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + MVT::f32, + LHS, RHS, + DAG.getConstantFP(1.0f, MVT::f32), + DAG.getConstantFP(0.0f, MVT::f32), + CC); + } else { + assert(0 && "Not valid type for br_cc"); + } + Result = DAG.getNode( + AMDGPUISD::BRANCH_COND, + CmpValue.getDebugLoc(), + MVT::Other, Chain, + JumpT, CmpValue); + return Result; +} + +SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, + DebugLoc DL, + unsigned DwordOffset) const { + unsigned ByteOffset = DwordOffset * 4; + PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), + AMDGPUAS::PARAM_I_ADDRESS); + + // We shouldn't be using an offset wider than 16-bits for implicit parameters. + assert(isInt<16>(ByteOffset)); + + return DAG.getLoad(VT, DL, DAG.getEntryNode(), + DAG.getConstant(ByteOffset, MVT::i32), // PTR + MachinePointerInfo(ConstantPointerNull::get(PtrType)), + false, false, false, 0); +} + +SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, + Op.getOperand(0), + Op.getOperand(0), + DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(32, MVT::i32), + Op.getOperand(1))); +} + +bool R600TargetLowering::isZero(SDValue Op) const { + if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { + return Cst->isNullValue(); + } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ + return CstFP->isZero(); + } else { + return false; + } +} + +SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue True = Op.getOperand(2); + SDValue False = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + SDValue Temp; + + // LHS and RHS are guaranteed to be the same value type + EVT CompareVT = LHS.getValueType(); + + // Check if we can lower this to a native operation. + + // Try to lower to a CND* instruction: + // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that + // can be lowered to CND* instructions can also be lowered to SET* + // instructions. CND* instructions are cheaper, because they dont't + // require additional instructions to convert their result to the correct + // value type, so this check should be first. + if (isZero(LHS) || isZero(RHS)) { + SDValue Cond = (isZero(LHS) ? RHS : LHS); + SDValue Zero = (isZero(LHS) ? LHS : RHS); + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); + if (CompareVT != VT) { + // Bitcast True / False to the correct types. This will end up being + // a nop, but it allows us to define only a single pattern in the + // .TD files for each CND* instruction rather than having to have + // one pattern for integer True/False and one for fp True/False + True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); + False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); + } + if (isZero(LHS)) { + CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); + } + + switch (CCOpcode) { + case ISD::SETONE: + case ISD::SETUNE: + case ISD::SETNE: + case ISD::SETULE: + case ISD::SETULT: + case ISD::SETOLE: + case ISD::SETOLT: + case ISD::SETLE: + case ISD::SETLT: + CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); + Temp = True; + True = False; + False = Temp; + break; + default: + break; + } + SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + Cond, Zero, + True, False, + DAG.getCondCode(CCOpcode)); + return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); + } + + // Try to lower to a SET* instruction: + // We need all the operands of SELECT_CC to have the same value type, so if + // necessary we need to change True and False to be the same type as LHS and + // RHS, and then convert the result of the select_cc back to the correct type. + + // Move hardware True/False values to the correct operand. + if (isHWTrueValue(False) && isHWFalseValue(True)) { + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); + std::swap(False, True); + CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); + } + + if (isHWTrueValue(True) && isHWFalseValue(False)) { + if (CompareVT != VT) { + if (VT == MVT::f32 && CompareVT == MVT::i32) { + SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + // Convert integer values of true (-1) and false (0) to fp values of + // true (1.0f) and false (0.0f). + SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); + } else if (VT == MVT::i32 && CompareVT == MVT::f32) { + SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, + LHS, RHS, + DAG.getConstantFP(1.0f, MVT::f32), + DAG.getConstantFP(0.0f, MVT::f32), + CC); + // Convert fp values of true (1.0f) and false (0.0f) to integer values + // of true (-1) and false (0). + SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt); + return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg); + } else { + // I don't think there will be any other type pairings. + assert(!"Unhandled operand type parings in SELECT_CC"); + } + } else { + // This SELECT_CC is already legal. + return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + } + } + + // Possible Min/Max pattern + SDValue MinMax = LowerMinMax(Op, DAG); + if (MinMax.getNode()) { + return MinMax; + } + + // If we make it this for it means we have no native instructions to handle + // this SELECT_CC, so we must lower it. + SDValue HWTrue, HWFalse; + + if (CompareVT == MVT::f32) { + HWTrue = DAG.getConstantFP(1.0f, CompareVT); + HWFalse = DAG.getConstantFP(0.0f, CompareVT); + } else if (CompareVT == MVT::i32) { + HWTrue = DAG.getConstant(-1, CompareVT); + HWFalse = DAG.getConstant(0, CompareVT); + } + else { + assert(!"Unhandled value type in LowerSELECT_CC"); + } + + // Lower this unsupported SELECT_CC into a combination of two supported + // SELECT_CC operations. + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); + + return DAG.getNode(ISD::SELECT_CC, DL, VT, + Cond, HWFalse, + True, False, + DAG.getCondCode(ISD::SETNE)); +} + +SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + return DAG.getNode(ISD::SELECT_CC, + Op.getDebugLoc(), + Op.getValueType(), + Op.getOperand(0), + DAG.getConstant(0, MVT::i32), + Op.getOperand(1), + Op.getOperand(2), + DAG.getCondCode(ISD::SETNE)); +} + +SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + assert(Op.getValueType() == MVT::i32); + if (LHS.getValueType() == MVT::i32) { + Cond = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + MVT::i32, + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + } else if (LHS.getValueType() == MVT::f32) { + Cond = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + MVT::f32, + LHS, RHS, + DAG.getConstantFP(1.0f, MVT::f32), + DAG.getConstantFP(0.0f, MVT::f32), + CC); + Cond = DAG.getNode( + ISD::FP_TO_SINT, + DL, + MVT::i32, + Cond); + } else { + assert(0 && "Not valid type for set_cc"); + } + Cond = DAG.getNode( + ISD::AND, + DL, + MVT::i32, + DAG.getConstant(1, MVT::i32), + Cond); + return Cond; +} + +SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + StoreSDNode *StoreNode = cast<StoreSDNode>(Op); + SDValue Chain = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Ptr = Op.getOperand(2); + + if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { + // Convert pointer from byte address to dword address. + Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), + DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(2, MVT::i32))); + + if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { + assert(!"Truncated and indexed stores not supported yet"); + } else { + Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); + } + return Chain; + } + return SDValue(); +} + + +SDValue R600TargetLowering::LowerFPOW(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0)); + SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase); + return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase); +} + +/// XXX Only kernel functions are supported, so we can assume for now that +/// every function is a kernel function, but in the future we should use +/// separate calling conventions for kernel and non-kernel functions. +SDValue R600TargetLowering::LowerFormalArguments( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + unsigned ParamOffsetBytes = 36; + Function::const_arg_iterator FuncArg = + DAG.getMachineFunction().getFunction()->arg_begin(); + for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { + EVT VT = Ins[i].VT; + Type *ArgType = FuncArg->getType(); + unsigned ArgSizeInBits = ArgType->isPointerTy() ? + 32 : ArgType->getPrimitiveSizeInBits(); + unsigned ArgBytes = ArgSizeInBits >> 3; + EVT ArgVT; + if (ArgSizeInBits < VT.getSizeInBits()) { + assert(!ArgType->isFloatTy() && + "Extending floating point arguments not supported yet"); + ArgVT = MVT::getIntegerVT(ArgSizeInBits); + } else { + ArgVT = VT; + } + PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), + AMDGPUAS::PARAM_I_ADDRESS); + SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), + DAG.getConstant(ParamOffsetBytes, MVT::i32), + MachinePointerInfo(new Argument(PtrTy)), + ArgVT, false, false, ArgBytes); + InVals.push_back(Arg); + ParamOffsetBytes += ArgBytes; + } + return Chain; +} + +EVT R600TargetLowering::getSetCCResultType(EVT VT) const { + if (!VT.isVector()) return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +//===----------------------------------------------------------------------===// +// Custom DAG Optimizations +//===----------------------------------------------------------------------===// + +SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + switch (N->getOpcode()) { + // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) + case ISD::FP_ROUND: { + SDValue Arg = N->getOperand(0); + if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { + return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), + Arg.getOperand(0)); + } + break; + } + } + return SDValue(); +} diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h new file mode 100644 index 0000000000..2b954dab55 --- /dev/null +++ b/lib/Target/R600/R600ISelLowering.h @@ -0,0 +1,72 @@ +//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 DAG Lowering interface definition +// +//===----------------------------------------------------------------------===// + +#ifndef R600ISELLOWERING_H +#define R600ISELLOWERING_H + +#include "AMDGPUISelLowering.h" + +namespace llvm { + +class R600InstrInfo; + +class R600TargetLowering : public AMDGPUTargetLowering { +public: + R600TargetLowering(TargetMachine &TM); + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock * BB) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + void ReplaceNodeResults(SDNode * N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + virtual EVT getSetCCResultType(EVT VT) const; +private: + const R600InstrInfo * TII; + + /// Each OpenCL kernel has nine implicit parameters that are stored in the + /// first nine dwords of a Vertex Buffer. These implicit parameters are + /// lowered to load instructions which retreive the values from the Vertex + /// Buffer. + SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT, + DebugLoc DL, unsigned DwordOffset) const; + + void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, + MachineRegisterInfo & MRI, unsigned dword_offset) const; + + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + + /// \brief Lower ROTL opcode to BITALIGN + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; + + bool isZero(SDValue Op) const; +}; + +} // End namespace llvm; + +#endif // R600ISELLOWERING_H diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp new file mode 100644 index 0000000000..06b78d09cc --- /dev/null +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -0,0 +1,666 @@ +//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 Implementation of TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "R600InstrInfo.h" +#include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "R600Defines.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +#define GET_INSTRINFO_CTOR +#include "AMDGPUGenDFAPacketizer.inc" + +using namespace llvm; + +R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) + : AMDGPUInstrInfo(tm), + RI(tm, *this) + { } + +const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { + return RI; +} + +bool R600InstrInfo::isTrig(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; +} + +bool R600InstrInfo::isVector(const MachineInstr &MI) const { + return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; +} + +void +R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (AMDGPU::R600_Reg128RegClass.contains(DestReg) + && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { + for (unsigned I = 0; I < 4; I++) { + unsigned SubRegIndex = RI.getSubRegFromChannel(I); + buildDefaultInstruction(MBB, MI, AMDGPU::MOV, + RI.getSubReg(DestReg, SubRegIndex), + RI.getSubReg(SrcReg, SubRegIndex)) + .addReg(DestReg, + RegState::Define | RegState::Implicit); + } + } else { + + // We can't copy vec4 registers + assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) + && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); + + MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, + DestReg, SrcReg); + NewMI->getOperand(getOperandIdx(*NewMI, R600Operands::SRC0)) + .setIsKill(KillSrc); + } +} + +MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, + unsigned DstReg, int64_t Imm) const { + MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); + MachineInstrBuilder MIB(*MF, MI); + MIB.addReg(DstReg, RegState::Define); + MIB.addReg(AMDGPU::ALU_LITERAL_X); + MIB.addImm(Imm); + MIB.addReg(0); // PREDICATE_BIT + + return MI; +} + +unsigned R600InstrInfo::getIEQOpcode() const { + return AMDGPU::SETE_INT; +} + +bool R600InstrInfo::isMov(unsigned Opcode) const { + + + switch(Opcode) { + default: return false; + case AMDGPU::MOV: + case AMDGPU::MOV_IMM_F32: + case AMDGPU::MOV_IMM_I32: + return true; + } +} + +// Some instructions act as place holders to emulate operations that the GPU +// hardware does automatically. This function can be used to check if +// an opcode falls into this category. +bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { + switch (Opcode) { + default: return false; + case AMDGPU::RETURN: + case AMDGPU::RESERVE_REG: + return true; + } +} + +bool R600InstrInfo::isReductionOp(unsigned Opcode) const { + switch(Opcode) { + default: return false; + case AMDGPU::DOT4_r600_pseudo: + case AMDGPU::DOT4_eg_pseudo: + return true; + } +} + +bool R600InstrInfo::isCubeOp(unsigned Opcode) const { + switch(Opcode) { + default: return false; + case AMDGPU::CUBE_r600_pseudo: + case AMDGPU::CUBE_r600_real: + case AMDGPU::CUBE_eg_pseudo: + case AMDGPU::CUBE_eg_real: + return true; + } +} + +bool R600InstrInfo::isALUInstr(unsigned Opcode) const { + unsigned TargetFlags = get(Opcode).TSFlags; + + return ((TargetFlags & R600_InstFlag::OP1) | + (TargetFlags & R600_InstFlag::OP2) | + (TargetFlags & R600_InstFlag::OP3)); +} + +DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); +} + +static bool +isPredicateSetter(unsigned Opcode) { + switch (Opcode) { + case AMDGPU::PRED_X: + return true; + default: + return false; + } +} + +static MachineInstr * +findFirstPredicateSetterFrom(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + while (I != MBB.begin()) { + --I; + MachineInstr *MI = I; + if (isPredicateSetter(MI->getOpcode())) + return MI; + } + + return NULL; +} + +bool +R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // Most of the following comes from the ARM implementation of AnalyzeBranch + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { + return false; + } + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || + static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { + if (LastOpc == AMDGPU::JUMP) { + if(!isPredicated(LastInst)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + MachineInstr *predSet = I; + while (!isPredicateSetter(predSet->getOpcode())) { + predSet = --I; + } + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(predSet->getOperand(1)); + Cond.push_back(predSet->getOperand(2)); + Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + return false; + } + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If the block ends with a B and a Bcc, handle it. + if (SecondLastOpc == AMDGPU::JUMP && + isPredicated(SecondLastInst) && + LastOpc == AMDGPU::JUMP && + !isPredicated(LastInst)) { + MachineInstr *predSet = --I; + while (!isPredicateSetter(predSet->getOpcode())) { + predSet = --I; + } + TBB = SecondLastInst->getOperand(0).getMBB(); + FBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(predSet->getOperand(1)); + Cond.push_back(predSet->getOperand(2)); + Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { + const MachineInstr *MI = op.getParent(); + + switch (MI->getDesc().OpInfo->RegClass) { + default: // FIXME: fallthrough?? + case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; + case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; + }; +} + +unsigned +R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (FBB == 0) { + if (Cond.empty()) { + BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); + return 1; + } else { + MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); + assert(PredSet && "No previous predicate !"); + addFlag(PredSet, 0, MO_FLAG_PUSH); + PredSet->getOperand(2).setImm(Cond[1].getImm()); + + BuildMI(&MBB, DL, get(AMDGPU::JUMP)) + .addMBB(TBB) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + return 1; + } + } else { + MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); + assert(PredSet && "No previous predicate !"); + addFlag(PredSet, 0, MO_FLAG_PUSH); + PredSet->getOperand(2).setImm(Cond[1].getImm()); + BuildMI(&MBB, DL, get(AMDGPU::JUMP)) + .addMBB(TBB) + .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); + BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); + return 2; + } +} + +unsigned +R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + + // Note : we leave PRED* instructions there. + // They may be needed when predicating instructions. + + MachineBasicBlock::iterator I = MBB.end(); + + if (I == MBB.begin()) { + return 0; + } + --I; + switch (I->getOpcode()) { + default: + return 0; + case AMDGPU::JUMP: + if (isPredicated(I)) { + MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); + clearFlag(predSet, 0, MO_FLAG_PUSH); + } + I->eraseFromParent(); + break; + } + I = MBB.end(); + + if (I == MBB.begin()) { + return 1; + } + --I; + switch (I->getOpcode()) { + // FIXME: only one case?? + default: + return 1; + case AMDGPU::JUMP: + if (isPredicated(I)) { + MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); + clearFlag(predSet, 0, MO_FLAG_PUSH); + } + I->eraseFromParent(); + break; + } + return 2; +} + +bool +R600InstrInfo::isPredicated(const MachineInstr *MI) const { + int idx = MI->findFirstPredOperandIdx(); + if (idx < 0) + return false; + + unsigned Reg = MI->getOperand(idx).getReg(); + switch (Reg) { + default: return false; + case AMDGPU::PRED_SEL_ONE: + case AMDGPU::PRED_SEL_ZERO: + case AMDGPU::PREDICATE_BIT: + return true; + } +} + +bool +R600InstrInfo::isPredicable(MachineInstr *MI) const { + // XXX: KILL* instructions can be predicated, but they must be the last + // instruction in a clause, so this means any instructions after them cannot + // be predicated. Until we have proper support for instruction clauses in the + // backend, we will mark KILL* instructions as unpredicable. + + if (MI->getOpcode() == AMDGPU::KILLGT) { + return false; + } else { + return AMDGPUInstrInfo::isPredicable(MI); + } +} + + +bool +R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCyles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const{ + return true; +} + +bool +R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, + unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, + unsigned ExtraFCycles, + const BranchProbability &Probability) const { + return true; +} + +bool +R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCyles, + const BranchProbability &Probability) + const { + return true; +} + +bool +R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const { + return false; +} + + +bool +R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + MachineOperand &MO = Cond[1]; + switch (MO.getImm()) { + case OPCODE_IS_ZERO_INT: + MO.setImm(OPCODE_IS_NOT_ZERO_INT); + break; + case OPCODE_IS_NOT_ZERO_INT: + MO.setImm(OPCODE_IS_ZERO_INT); + break; + case OPCODE_IS_ZERO: + MO.setImm(OPCODE_IS_NOT_ZERO); + break; + case OPCODE_IS_NOT_ZERO: + MO.setImm(OPCODE_IS_ZERO); + break; + default: + return true; + } + + MachineOperand &MO2 = Cond[2]; + switch (MO2.getReg()) { + case AMDGPU::PRED_SEL_ZERO: + MO2.setReg(AMDGPU::PRED_SEL_ONE); + break; + case AMDGPU::PRED_SEL_ONE: + MO2.setReg(AMDGPU::PRED_SEL_ZERO); + break; + default: + return true; + } + return false; +} + +bool +R600InstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + return isPredicateSetter(MI->getOpcode()); +} + + +bool +R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + return false; +} + + +bool +R600InstrInfo::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + int PIdx = MI->findFirstPredOperandIdx(); + + if (PIdx != -1) { + MachineOperand &PMO = MI->getOperand(PIdx); + PMO.setReg(Pred[2].getReg()); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); + return true; + } + + return false; +} + +unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (PredCost) + *PredCost = 2; + return 2; +} + +MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opcode, + unsigned DstReg, + unsigned Src0Reg, + unsigned Src1Reg) const { + MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), + DstReg); // $dst + + if (Src1Reg) { + MIB.addImm(0) // $update_exec_mask + .addImm(0); // $update_predicate + } + MIB.addImm(1) // $write + .addImm(0) // $omod + .addImm(0) // $dst_rel + .addImm(0) // $dst_clamp + .addReg(Src0Reg) // $src0 + .addImm(0) // $src0_neg + .addImm(0) // $src0_rel + .addImm(0); // $src0_abs + + if (Src1Reg) { + MIB.addReg(Src1Reg) // $src1 + .addImm(0) // $src1_neg + .addImm(0) // $src1_rel + .addImm(0); // $src1_abs + } + + //XXX: The r600g finalizer expects this to be 1, once we've moved the + //scheduling to the backend, we can change the default to 0. + MIB.addImm(1) // $last + .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel + .addImm(0); // $literal + + return MIB; +} + +MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + unsigned DstReg, + uint64_t Imm) const { + MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, + AMDGPU::ALU_LITERAL_X); + setImmOperand(MovImm, R600Operands::IMM, Imm); + return MovImm; +} + +int R600InstrInfo::getOperandIdx(const MachineInstr &MI, + R600Operands::Ops Op) const { + return getOperandIdx(MI.getOpcode(), Op); +} + +int R600InstrInfo::getOperandIdx(unsigned Opcode, + R600Operands::Ops Op) const { + const static int OpTable[3][R600Operands::COUNT] = { +// W C S S S S S S S S +// R O D L S R R R S R R R S R R L P +// D U I M R A R C C C C C C C R C C A R I +// S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M +// T M P E D L P 0 N R A 1 N R A 2 N R T D M + {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11}, + {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17}, + {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14} + }; + unsigned TargetFlags = get(Opcode).TSFlags; + unsigned OpTableIdx; + + if (!HAS_NATIVE_OPERANDS(TargetFlags)) { + switch (Op) { + case R600Operands::DST: return 0; + case R600Operands::SRC0: return 1; + case R600Operands::SRC1: return 2; + case R600Operands::SRC2: return 3; + default: + assert(!"Unknown operand type for instruction"); + return -1; + } + } + + if (TargetFlags & R600_InstFlag::OP1) { + OpTableIdx = 0; + } else if (TargetFlags & R600_InstFlag::OP2) { + OpTableIdx = 1; + } else { + assert((TargetFlags & R600_InstFlag::OP3) && "OP1, OP2, or OP3 not defined " + "for this instruction"); + OpTableIdx = 2; + } + + return OpTable[OpTableIdx][Op]; +} + +void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op, + int64_t Imm) const { + int Idx = getOperandIdx(*MI, Op); + assert(Idx != -1 && "Operand not supported for this instruction."); + assert(MI->getOperand(Idx).isImm()); + MI->getOperand(Idx).setImm(Imm); +} + +//===----------------------------------------------------------------------===// +// Instruction flag getters/setters +//===----------------------------------------------------------------------===// + +bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { + return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; +} + +MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, + unsigned Flag) const { + unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + int FlagIndex = 0; + if (Flag != 0) { + // If we pass something other than the default value of Flag to this + // function, it means we are want to set a flag on an instruction + // that uses native encoding. + assert(HAS_NATIVE_OPERANDS(TargetFlags)); + bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; + switch (Flag) { + case MO_FLAG_CLAMP: + FlagIndex = getOperandIdx(*MI, R600Operands::CLAMP); + break; + case MO_FLAG_MASK: + FlagIndex = getOperandIdx(*MI, R600Operands::WRITE); + break; + case MO_FLAG_NOT_LAST: + case MO_FLAG_LAST: + FlagIndex = getOperandIdx(*MI, R600Operands::LAST); + break; + case MO_FLAG_NEG: + switch (SrcIdx) { + case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_NEG); break; + case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_NEG); break; + case 2: FlagIndex = getOperandIdx(*MI, R600Operands::SRC2_NEG); break; + } + break; + + case MO_FLAG_ABS: + assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " + "instructions."); + (void)IsOP3; + switch (SrcIdx) { + case 0: FlagIndex = getOperandIdx(*MI, R600Operands::SRC0_ABS); break; + case 1: FlagIndex = getOperandIdx(*MI, R600Operands::SRC1_ABS); break; + } + break; + + default: + FlagIndex = -1; + break; + } + assert(FlagIndex != -1 && "Flag not supported for this instruction"); + } else { + FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); + assert(FlagIndex != 0 && + "Instruction flags not supported for this instruction"); + } + + MachineOperand &FlagOp = MI->getOperand(FlagIndex); + assert(FlagOp.isImm()); + return FlagOp; +} + +void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, + unsigned Flag) const { + unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + if (Flag == 0) { + return; + } + if (HAS_NATIVE_OPERANDS(TargetFlags)) { + MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); + if (Flag == MO_FLAG_NOT_LAST) { + clearFlag(MI, Operand, MO_FLAG_LAST); + } else if (Flag == MO_FLAG_MASK) { + clearFlag(MI, Operand, Flag); + } else { + FlagOp.setImm(1); + } + } else { + MachineOperand &FlagOp = getFlagOp(MI, Operand); + FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); + } +} + +void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, + unsigned Flag) const { + unsigned TargetFlags = get(MI->getOpcode()).TSFlags; + if (HAS_NATIVE_OPERANDS(TargetFlags)) { + MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); + FlagOp.setImm(0); + } else { + MachineOperand &FlagOp = getFlagOp(MI); + unsigned InstFlags = FlagOp.getImm(); + InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); + FlagOp.setImm(InstFlags); + } +} diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h new file mode 100644 index 0000000000..11685af5b0 --- /dev/null +++ b/lib/Target/R600/R600InstrInfo.h @@ -0,0 +1,168 @@ +//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface definition for R600InstrInfo +// +//===----------------------------------------------------------------------===// + +#ifndef R600INSTRUCTIONINFO_H_ +#define R600INSTRUCTIONINFO_H_ + +#include "AMDGPUInstrInfo.h" +#include "AMDIL.h" +#include "R600Defines.h" +#include "R600RegisterInfo.h" +#include <map> + +namespace llvm { + + class AMDGPUTargetMachine; + class DFAPacketizer; + class ScheduleDAG; + class MachineFunction; + class MachineInstr; + class MachineInstrBuilder; + + class R600InstrInfo : public AMDGPUInstrInfo { + private: + const R600RegisterInfo RI; + + int getBranchInstr(const MachineOperand &op) const; + + public: + explicit R600InstrInfo(AMDGPUTargetMachine &tm); + + const R600RegisterInfo &getRegisterInfo() const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + bool isTrig(const MachineInstr &MI) const; + bool isPlaceHolderOpcode(unsigned opcode) const; + bool isReductionOp(unsigned opcode) const; + bool isCubeOp(unsigned opcode) const; + + /// \returns true if this \p Opcode represents an ALU instruction. + bool isALUInstr(unsigned Opcode) const; + + /// \breif Vector instructions are instructions that must fill all + /// instruction slots within an instruction group. + bool isVector(const MachineInstr &MI) const; + + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const; + + virtual unsigned getIEQOpcode() const; + virtual bool isMov(unsigned Opcode) const; + + DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + bool isPredicated(const MachineInstr *MI) const; + + bool isPredicable(MachineInstr *MI) const; + + bool + isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + const BranchProbability &Probability) const; + + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const ; + + bool + isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + const BranchProbability &Probability) const; + + bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const; + + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; + + unsigned int getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const; + + virtual int getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const { return 1;} + + /// You can use this function to avoid manually specifying each instruction + /// modifier operand when building a new instruction. + /// + /// \returns a MachineInstr with all the instruction modifiers initialized + /// to their default values. + MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opcode, + unsigned DstReg, + unsigned Src0Reg, + unsigned Src1Reg = 0) const; + + MachineInstr *buildMovImm(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + unsigned DstReg, + uint64_t Imm) const; + + /// \brief Get the index of Op in the MachineInstr. + /// + /// \returns -1 if the Instruction does not contain the specified \p Op. + int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const; + + /// \brief Get the index of \p Op for the given Opcode. + /// + /// \returns -1 if the Instruction does not contain the specified \p Op. + int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const; + + /// \brief Helper function for setting instruction flag values. + void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const; + + /// \returns true if this instruction has an operand for storing target flags. + bool hasFlagOperand(const MachineInstr &MI) const; + + ///\brief Add one of the MO_FLAG* flags to the specified \p Operand. + void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; + + ///\brief Determine if the specified \p Flag is set on this \p Operand. + bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const; + + /// \param SrcIdx The register source to set the flag on (e.g src0, src1, src2) + /// \param Flag The flag being set. + /// + /// \returns the operand containing the flags for this instruction. + MachineOperand &getFlagOp(MachineInstr *MI, unsigned SrcIdx = 0, + unsigned Flag = 0) const; + + /// \brief Clear the specified flag on the instruction. + void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const; +}; + +} // End llvm namespace + +#endif // R600INSTRINFO_H_ diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td new file mode 100644 index 0000000000..64bab18fa6 --- /dev/null +++ b/lib/Target/R600/R600Instructions.td @@ -0,0 +1,1724 @@ +//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 Tablegen instruction definitions +// +//===----------------------------------------------------------------------===// + +include "R600Intrinsics.td" + +class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern, + InstrItinClass itin> + : AMDGPUInst <outs, ins, asm, pattern> { + + field bits<64> Inst; + bit Trig = 0; + bit Op3 = 0; + bit isVector = 0; + bits<2> FlagOperandIdx = 0; + bit Op1 = 0; + bit Op2 = 0; + bit HasNativeOperands = 0; + + bits<11> op_code = inst; + //let Inst = inst; + let Namespace = "AMDGPU"; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + let Itinerary = itin; + + let TSFlags{4} = Trig; + let TSFlags{5} = Op3; + + // Vector instructions are instructions that must fill all slots in an + // instruction group + let TSFlags{6} = isVector; + let TSFlags{8-7} = FlagOperandIdx; + let TSFlags{9} = HasNativeOperands; + let TSFlags{10} = Op1; + let TSFlags{11} = Op2; +} + +class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : + AMDGPUInst <outs, ins, asm, pattern> { + field bits<64> Inst; + + let Namespace = "AMDGPU"; +} + +def MEMxi : Operand<iPTR> { + let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); + let PrintMethod = "printMemOperand"; +} + +def MEMrr : Operand<iPTR> { + let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); +} + +// Operands for non-registers + +class InstFlag<string PM = "printOperand", int Default = 0> + : OperandWithDefaultOps <i32, (ops (i32 Default))> { + let PrintMethod = PM; +} + +def LITERAL : InstFlag<"printLiteral">; + +def WRITE : InstFlag <"printWrite", 1>; +def OMOD : InstFlag <"printOMOD">; +def REL : InstFlag <"printRel">; +def CLAMP : InstFlag <"printClamp">; +def NEG : InstFlag <"printNeg">; +def ABS : InstFlag <"printAbs">; +def UEM : InstFlag <"printUpdateExecMask">; +def UP : InstFlag <"printUpdatePred">; + +// XXX: The r600g finalizer in Mesa expects last to be one in most cases. +// Once we start using the packetizer in this backend we should have this +// default to 0. +def LAST : InstFlag<"printLast", 1>; + +def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; +def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; +def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; + +class R600ALU_Word0 { + field bits<32> Word0; + + bits<11> src0; + bits<1> src0_neg; + bits<1> src0_rel; + bits<11> src1; + bits<1> src1_rel; + bits<1> src1_neg; + bits<3> index_mode = 0; + bits<2> pred_sel; + bits<1> last; + + bits<9> src0_sel = src0{8-0}; + bits<2> src0_chan = src0{10-9}; + bits<9> src1_sel = src1{8-0}; + bits<2> src1_chan = src1{10-9}; + + let Word0{8-0} = src0_sel; + let Word0{9} = src0_rel; + let Word0{11-10} = src0_chan; + let Word0{12} = src0_neg; + let Word0{21-13} = src1_sel; + let Word0{22} = src1_rel; + let Word0{24-23} = src1_chan; + let Word0{25} = src1_neg; + let Word0{28-26} = index_mode; + let Word0{30-29} = pred_sel; + let Word0{31} = last; +} + +class R600ALU_Word1 { + field bits<32> Word1; + + bits<11> dst; + bits<3> bank_swizzle = 0; + bits<1> dst_rel; + bits<1> clamp; + + bits<7> dst_sel = dst{6-0}; + bits<2> dst_chan = dst{10-9}; + + let Word1{20-18} = bank_swizzle; + let Word1{27-21} = dst_sel; + let Word1{28} = dst_rel; + let Word1{30-29} = dst_chan; + let Word1{31} = clamp; +} + +class R600ALU_Word1_OP2 <bits<11> alu_inst> : R600ALU_Word1{ + + bits<1> src0_abs; + bits<1> src1_abs; + bits<1> update_exec_mask; + bits<1> update_pred; + bits<1> write; + bits<2> omod; + + let Word1{0} = src0_abs; + let Word1{1} = src1_abs; + let Word1{2} = update_exec_mask; + let Word1{3} = update_pred; + let Word1{4} = write; + let Word1{6-5} = omod; + let Word1{17-7} = alu_inst; +} + +class R600ALU_Word1_OP3 <bits<5> alu_inst> : R600ALU_Word1{ + + bits<11> src2; + bits<1> src2_rel; + bits<1> src2_neg; + + bits<9> src2_sel = src2{8-0}; + bits<2> src2_chan = src2{10-9}; + + let Word1{8-0} = src2_sel; + let Word1{9} = src2_rel; + let Word1{11-10} = src2_chan; + let Word1{12} = src2_neg; + let Word1{17-13} = alu_inst; +} + +/* +XXX: R600 subtarget uses a slightly different encoding than the other +subtargets. We currently handle this in R600MCCodeEmitter, but we may +want to use these instruction classes in the future. + +class R600ALU_Word1_OP2_r600 : R600ALU_Word1_OP2 { + + bits<1> fog_merge; + bits<10> alu_inst; + + let Inst{37} = fog_merge; + let Inst{39-38} = omod; + let Inst{49-40} = alu_inst; +} + +class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { + + bits<11> alu_inst; + + let Inst{38-37} = omod; + let Inst{49-39} = alu_inst; +} +*/ + +def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), + (ops PRED_SEL_OFF)>; + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + +// Class for instructions with only one source register. +// If you add new ins to this instruction, make sure they are listed before +// $literal, because the backend currently assumes that the last operand is +// a literal. Also be sure to update the enum R600Op1OperandIndex::ROI in +// R600Defines.h, R600InstrInfo::buildDefaultInstruction(), +// and R600InstrInfo::getOperandIdx(). +class R600_1OP <bits<11> inst, string opName, list<dag> pattern, + InstrItinClass itin = AnyALU> : + InstR600 <0, + (outs R600_Reg32:$dst), + (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, + LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), + !strconcat(opName, + "$clamp $dst$write$dst_rel$omod, " + "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$literal $pred_sel$last"), + pattern, + itin>, + R600ALU_Word0, + R600ALU_Word1_OP2 <inst> { + + let src1 = 0; + let src1_rel = 0; + let src1_neg = 0; + let src1_abs = 0; + let update_exec_mask = 0; + let update_pred = 0; + let HasNativeOperands = 1; + let Op1 = 1; + let DisableEncoding = "$literal"; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, + InstrItinClass itin = AnyALU> : + R600_1OP <inst, opName, + [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] +>; + +// If you add our change the operands for R600_2OP instructions, you must +// also update the R600Op2OperandIndex::ROI enum in R600Defines.h, +// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx(). +class R600_2OP <bits<11> inst, string opName, list<dag> pattern, + InstrItinClass itin = AnyALU> : + InstR600 <inst, + (outs R600_Reg32:$dst), + (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write, + OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, + LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), + !strconcat(opName, + "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " + "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " + "$literal $pred_sel$last"), + pattern, + itin>, + R600ALU_Word0, + R600ALU_Word1_OP2 <inst> { + + let HasNativeOperands = 1; + let Op2 = 1; + let DisableEncoding = "$literal"; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, + InstrItinClass itim = AnyALU> : + R600_2OP <inst, opName, + [(set R600_Reg32:$dst, (node R600_Reg32:$src0, + R600_Reg32:$src1))] +>; + +// If you add our change the operands for R600_3OP instructions, you must +// also update the R600Op3OperandIndex::ROI enum in R600Defines.h, +// R600InstrInfo::buildDefaultInstruction(), and +// R600InstrInfo::getOperandIdx(). +class R600_3OP <bits<5> inst, string opName, list<dag> pattern, + InstrItinClass itin = AnyALU> : + InstR600 <0, + (outs R600_Reg32:$dst), + (ins REL:$dst_rel, CLAMP:$clamp, + R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, + R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, + R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, + LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), + !strconcat(opName, "$clamp $dst$dst_rel, " + "$src0_neg$src0$src0_rel, " + "$src1_neg$src1$src1_rel, " + "$src2_neg$src2$src2_rel, " + "$literal $pred_sel$last"), + pattern, + itin>, + R600ALU_Word0, + R600ALU_Word1_OP3<inst>{ + + let HasNativeOperands = 1; + let DisableEncoding = "$literal"; + let Op3 = 1; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, + InstrItinClass itin = VecALU> : + InstR600 <inst, + (outs R600_Reg32:$dst), + ins, + asm, + pattern, + itin>; + +class R600_TEX <bits<11> inst, string opName, list<dag> pattern, + InstrItinClass itin = AnyALU> : + InstR600 <inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), + !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"), + pattern, + itin>{ + let Inst {10-0} = inst; + } + +} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 + +def TEX_SHADOW : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return (TType >= 6 && TType <= 8) || (TType >= 11 && TType <= 13); + }] +>; + +class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, + dag ins, string asm, list<dag> pattern> : + InstR600ISA <outs, ins, asm, pattern> { + bits<7> RW_GPR; + bits<7> INDEX_GPR; + + bits<2> RIM; + bits<2> TYPE; + bits<1> RW_REL; + bits<2> ELEM_SIZE; + + bits<12> ARRAY_SIZE; + bits<4> COMP_MASK; + bits<4> BURST_COUNT; + bits<1> VPM; + bits<1> eop; + bits<1> MARK; + bits<1> BARRIER; + + // CF_ALLOC_EXPORT_WORD0_RAT + let Inst{3-0} = rat_id; + let Inst{9-4} = rat_inst; + let Inst{10} = 0; // Reserved + let Inst{12-11} = RIM; + let Inst{14-13} = TYPE; + let Inst{21-15} = RW_GPR; + let Inst{22} = RW_REL; + let Inst{29-23} = INDEX_GPR; + let Inst{31-30} = ELEM_SIZE; + + // CF_ALLOC_EXPORT_WORD1_BUF + let Inst{43-32} = ARRAY_SIZE; + let Inst{47-44} = COMP_MASK; + let Inst{51-48} = BURST_COUNT; + let Inst{52} = VPM; + let Inst{53} = eop; + let Inst{61-54} = cf_inst; + let Inst{62} = MARK; + let Inst{63} = BARRIER; +} + +class LoadParamFrag <PatFrag load_type> : PatFrag < + (ops node:$ptr), (load_type node:$ptr), + [{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }] +>; + +def load_param : LoadParamFrag<load>; +def load_param_zexti8 : LoadParamFrag<zextloadi8>; +def load_param_zexti16 : LoadParamFrag<zextloadi16>; + +def isR600 : Predicate<"Subtarget.device()" + "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; +def isR700 : Predicate<"Subtarget.device()" + "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" + "Subtarget.device()->getDeviceFlag()" + ">= OCL_DEVICE_RV710">; +def isEG : Predicate< + "Subtarget.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " + "Subtarget.device()->getGeneration() < AMDGPUDeviceInfo::HD7XXX && " + "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; + +def isCayman : Predicate<"Subtarget.device()" + "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; +def isEGorCayman : Predicate<"Subtarget.device()" + "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" + "|| Subtarget.device()->getGeneration() ==" + "AMDGPUDeviceInfo::HD6XXX">; + +def isR600toCayman : Predicate< + "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; + +//===----------------------------------------------------------------------===// +// Interpolation Instructions +//===----------------------------------------------------------------------===// + +def INTERP: SDNode<"AMDGPUISD::INTERP", + SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]> + >; + +def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]> + >; + +let usesCustomInserter = 1 in { +def input_perspective : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins i32imm:$src0, i32imm:$src1), + "input_perspective $src0 $src1 : dst", + [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>; +} // End usesCustomInserter = 1 + +def input_constant : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins i32imm:$src), + "input_perspective $src : dst", + [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>; + + + +def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { + let bank_swizzle = 5; +} + +def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> { + let bank_swizzle = 5; +} + +def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; + +//===----------------------------------------------------------------------===// +// Export Instructions +//===----------------------------------------------------------------------===// + +def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; + +def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, + [SDNPHasChain, SDNPSideEffect]>; + +class ExportWord0 { + field bits<32> Word0; + + bits<13> arraybase; + bits<2> type; + bits<7> gpr; + bits<2> elem_size; + + let Word0{12-0} = arraybase; + let Word0{14-13} = type; + let Word0{21-15} = gpr; + let Word0{22} = 0; // RW_REL + let Word0{29-23} = 0; // INDEX_GPR + let Word0{31-30} = elem_size; +} + +class ExportSwzWord1 { + field bits<32> Word1; + + bits<3> sw_x; + bits<3> sw_y; + bits<3> sw_z; + bits<3> sw_w; + bits<1> eop; + bits<8> inst; + + let Word1{2-0} = sw_x; + let Word1{5-3} = sw_y; + let Word1{8-6} = sw_z; + let Word1{11-9} = sw_w; +} + +class ExportBufWord1 { + field bits<32> Word1; + + bits<12> arraySize; + bits<4> compMask; + bits<1> eop; + bits<8> inst; + + let Word1{11-0} = arraySize; + let Word1{15-12} = compMask; +} + +multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { + def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), + (ExportInst + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + 0, 61, 0, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), + (ExportInst + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x), + 0, 61, 7, 0, 7, 7, cf_inst, 0) + >; + + def : Pat<(int_R600_store_pixel_dummy), + (ExportInst + (v4f32 (IMPLICIT_DEF)), 0, 0, 7, 7, 7, 7, cf_inst, 0) + >; + + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 0, 1, 2, 3, cf_inst, 0) + >; +} + +multiclass SteamOutputExportPattern<Instruction ExportInst, + bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { +// Stream0 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf0inst, 0)>; +// Stream1 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 2), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf1inst, 0)>; +// Stream2 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 3), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf2inst, 0)>; +// Stream3 + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 4), + (i32 imm:$type), (i32 imm:$arraybase), (i32 imm:$mask)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, + 4095, imm:$mask, buf3inst, 0)>; +} + +let isTerminator = 1, usesCustomInserter = 1 in { + +class ExportSwzInst : InstR600ISA<( + outs), + (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, + i32imm:$sw_x, i32imm:$sw_y, i32imm:$sw_z, i32imm:$sw_w, i32imm:$inst, + i32imm:$eop), + !strconcat("EXPORT", " $gpr"), + []>, ExportWord0, ExportSwzWord1 { + let elem_size = 3; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +} // End isTerminator = 1, usesCustomInserter = 1 + +class ExportBufInst : InstR600ISA<( + outs), + (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase, + i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop), + !strconcat("EXPORT", " $gpr"), + []>, ExportWord0, ExportBufWord1 { + let elem_size = 0; + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +let Predicates = [isR600toCayman] in { + +//===----------------------------------------------------------------------===// +// Common Instructions R600, R700, Evergreen, Cayman +//===----------------------------------------------------------------------===// + +def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; +// Non-IEEE MUL: 0 * anything = 0 +def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; +def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; +def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>; +def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>; + +// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, +// so some of the instruction names don't match the asm string. +// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. +def SETE : R600_2OP < + 0x08, "SETE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_EQ))] +>; + +def SGT : R600_2OP < + 0x09, "SETGT", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_GT))] +>; + +def SGE : R600_2OP < + 0xA, "SETGE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_GE))] +>; + +def SNE : R600_2OP < + 0xB, "SETNE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_NE))] +>; + +def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; +def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; +def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; +def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; +def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; + +def MOV : R600_1OP <0x19, "MOV", []>; + +let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { + +class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst < + (outs R600_Reg32:$dst), + (ins immType:$imm), + "", + [] +>; + +} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 + +def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; +def : Pat < + (imm:$val), + (MOV_IMM_I32 imm:$val) +>; + +def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; +def : Pat < + (fpimm:$val), + (MOV_IMM_F32 fpimm:$val) +>; + +def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>; +def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>; +def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>; +def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>; + +let hasSideEffects = 1 in { + +def KILLGT : R600_2OP <0x2D, "KILLGT", []>; + +} // end hasSideEffects + +def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>; +def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>; +def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>; +def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>; +def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>; +def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>; +def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", AMDGPUsmax>; +def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", AMDGPUsmin>; +def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", AMDGPUumax>; +def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>; + +def SETE_INT : R600_2OP < + 0x3A, "SETE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] +>; + +def SETGT_INT : R600_2OP < + 0x3B, "SGT_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] +>; + +def SETGE_INT : R600_2OP < + 0x3C, "SETGE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] +>; + +def SETNE_INT : R600_2OP < + 0x3D, "SETNE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] +>; + +def SETGT_UINT : R600_2OP < + 0x3E, "SETGT_UINT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] +>; + +def SETGE_UINT : R600_2OP < + 0x3F, "SETGE_UINT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] +>; + +def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>; +def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>; +def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>; +def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>; + +def CNDE_INT : R600_3OP < + 0x1C, "CNDE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_EQ))] +>; + +def CNDGE_INT : R600_3OP < + 0x1E, "CNDGE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_GE))] +>; + +def CNDGT_INT : R600_3OP < + 0x1D, "CNDGT_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), 0, + (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2), + COND_GT))] +>; + +//===----------------------------------------------------------------------===// +// Texture instructions +//===----------------------------------------------------------------------===// + +def TEX_LD : R600_TEX < + 0x03, "TEX_LD", + [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +> { +let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget"; +let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget); +} + +def TEX_GET_TEXTURE_RESINFO : R600_TEX < + 0x04, "TEX_GET_TEXTURE_RESINFO", + [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TEX_GET_GRADIENTS_H : R600_TEX < + 0x07, "TEX_GET_GRADIENTS_H", + [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TEX_GET_GRADIENTS_V : R600_TEX < + 0x08, "TEX_GET_GRADIENTS_V", + [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TEX_SET_GRADIENTS_H : R600_TEX < + 0x0B, "TEX_SET_GRADIENTS_H", + [] +>; + +def TEX_SET_GRADIENTS_V : R600_TEX < + 0x0C, "TEX_SET_GRADIENTS_V", + [] +>; + +def TEX_SAMPLE : R600_TEX < + 0x10, "TEX_SAMPLE", + [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TEX_SAMPLE_C : R600_TEX < + 0x18, "TEX_SAMPLE_C", + [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] +>; + +def TEX_SAMPLE_L : R600_TEX < + 0x11, "TEX_SAMPLE_L", + [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TEX_SAMPLE_C_L : R600_TEX < + 0x19, "TEX_SAMPLE_C_L", + [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] +>; + +def TEX_SAMPLE_LB : R600_TEX < + 0x12, "TEX_SAMPLE_LB", + [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TEX_SAMPLE_C_LB : R600_TEX < + 0x1A, "TEX_SAMPLE_C_LB", + [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] +>; + +def TEX_SAMPLE_G : R600_TEX < + 0x14, "TEX_SAMPLE_G", + [] +>; + +def TEX_SAMPLE_C_G : R600_TEX < + 0x1C, "TEX_SAMPLE_C_G", + [] +>; + +//===----------------------------------------------------------------------===// +// Helper classes for common instructions +//===----------------------------------------------------------------------===// + +class MUL_LIT_Common <bits<5> inst> : R600_3OP < + inst, "MUL_LIT", + [] +>; + +class MULADD_Common <bits<5> inst> : R600_3OP < + inst, "MULADD", + [(set (f32 R600_Reg32:$dst), + (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] +>; + +class CNDE_Common <bits<5> inst> : R600_3OP < + inst, "CNDE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_EQ))] +>; + +class CNDGT_Common <bits<5> inst> : R600_3OP < + inst, "CNDGT", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_GT))] +>; + +class CNDGE_Common <bits<5> inst> : R600_3OP < + inst, "CNDGE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), FP_ZERO, + (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2), + COND_GE))] +>; + +multiclass DOT4_Common <bits<11> inst> { + + def _pseudo : R600_REDUCTION <inst, + (ins R600_Reg128:$src0, R600_Reg128:$src1), + "DOT4 $dst $src0, $src1", + [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] + >; + + def _real : R600_2OP <inst, "DOT4", []>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { +multiclass CUBE_Common <bits<11> inst> { + + def _pseudo : InstR600 < + inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src), + "CUBE $dst $src", + [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + VecALU + > { + let isPseudo = 1; + } + + def _real : R600_2OP <inst, "CUBE", []>; +} +} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0 + +class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper < + inst, "EXP_IEEE", fexp2 +>; + +class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper < + inst, "FLT_TO_INT", fp_to_sint +>; + +class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < + inst, "INT_TO_FLT", sint_to_fp +>; + +class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper < + inst, "FLT_TO_UINT", fp_to_uint +>; + +class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper < + inst, "UINT_TO_FLT", uint_to_fp +>; + +class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP < + inst, "LOG_CLAMPED", [] +>; + +class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper < + inst, "LOG_IEEE", flog2 +>; + +class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>; +class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>; +class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>; +class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper < + inst, "MULHI_INT", mulhs +>; +class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper < + inst, "MULHI", mulhu +>; +class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper < + inst, "MULLO_INT", mul +>; +class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>; + +class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP < + inst, "RECIP_CLAMPED", [] +>; + +class RECIP_IEEE_Common <bits<11> inst> : R600_1OP < + inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))] +>; + +class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < + inst, "RECIP_UINT", AMDGPUurecip +>; + +class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < + inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq +>; + +class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < + inst, "RECIPSQRT_IEEE", [] +>; + +class SIN_Common <bits<11> inst> : R600_1OP < + inst, "SIN", []>{ + let Trig = 1; +} + +class COS_Common <bits<11> inst> : R600_1OP < + inst, "COS", []> { + let Trig = 1; +} + +//===----------------------------------------------------------------------===// +// Helper patterns for complex intrinsics +//===----------------------------------------------------------------------===// + +multiclass DIV_Common <InstR600 recip_ieee> { +def : Pat< + (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), + (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) +>; + +def : Pat< + (fdiv R600_Reg32:$src0, R600_Reg32:$src1), + (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) +>; +} + +class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < + (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), + (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) +>; + +//===----------------------------------------------------------------------===// +// R600 / R700 Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isR600] in { + + def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; + def MULADD_r600 : MULADD_Common<0x10>; + def CNDE_r600 : CNDE_Common<0x18>; + def CNDGT_r600 : CNDGT_Common<0x19>; + def CNDGE_r600 : CNDGE_Common<0x1A>; + defm DOT4_r600 : DOT4_Common<0x50>; + defm CUBE_r600 : CUBE_Common<0x52>; + def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; + def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; + def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; + def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; + def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; + def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; + def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; + def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; + def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; + def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; + def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; + def SIN_r600 : SIN_Common<0x6E>; + def COS_r600 : COS_Common<0x6F>; + def ASHR_r600 : ASHR_Common<0x70>; + def LSHR_r600 : LSHR_Common<0x71>; + def LSHL_r600 : LSHL_Common<0x72>; + def MULLO_INT_r600 : MULLO_INT_Common<0x73>; + def MULHI_INT_r600 : MULHI_INT_Common<0x74>; + def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; + def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; + def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; + + defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; + def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; + + def : Pat<(fsqrt R600_Reg32:$src), + (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>; + + def R600_ExportSwz : ExportSwzInst { + let Word1{20-17} = 1; // BURST_COUNT + let Word1{21} = eop; + let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{30-23} = inst; + let Word1{31} = 1; // BARRIER + } + defm : ExportPattern<R600_ExportSwz, 39>; + + def R600_ExportBuf : ExportBufInst { + let Word1{20-17} = 1; // BURST_COUNT + let Word1{21} = eop; + let Word1{22} = 1; // VALID_PIXEL_MODE + let Word1{30-23} = inst; + let Word1{31} = 1; // BARRIER + } + defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>; +} + +// Helper pattern for normalizing inputs to triginomic instructions for R700+ +// cards. +class COS_PAT <InstR600 trig> : Pat< + (fcos R600_Reg32:$src), + (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) +>; + +class SIN_PAT <InstR600 trig> : Pat< + (fsin R600_Reg32:$src), + (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) +>; + +//===----------------------------------------------------------------------===// +// R700 Only instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isR700] in { + def SIN_r700 : SIN_Common<0x6E>; + def COS_r700 : COS_Common<0x6F>; + + // R700 normalizes inputs to SIN/COS the same as EG + def : SIN_PAT <SIN_r700>; + def : COS_PAT <COS_r700>; +} + +//===----------------------------------------------------------------------===// +// Evergreen Only instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isEG] in { + +def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; +defm DIV_eg : DIV_Common<RECIP_IEEE_eg>; + +def MULLO_INT_eg : MULLO_INT_Common<0x8F>; +def MULHI_INT_eg : MULHI_INT_Common<0x90>; +def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; +def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; +def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; +def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; +def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; +def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; +def SIN_eg : SIN_Common<0x8D>; +def COS_eg : COS_Common<0x8E>; + +def : SIN_PAT <SIN_eg>; +def : COS_PAT <COS_eg>; +def : Pat<(fsqrt R600_Reg32:$src), + (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>; +} // End Predicates = [isEG] + +//===----------------------------------------------------------------------===// +// Evergreen / Cayman Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isEGorCayman] in { + + // BFE_UINT - bit_extract, an optimization for mask and shift + // Src0 = Input + // Src1 = Offset + // Src2 = Width + // + // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) + // + // Example Usage: + // (Offset, Width) + // + // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 + // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 + // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 + // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 + def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", + [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, + R600_Reg32:$src1, + R600_Reg32:$src2))], + VecALU + >; + + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", + [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, + R600_Reg32:$src2))], + VecALU + >; + + def MULADD_eg : MULADD_Common<0x14>; + def ASHR_eg : ASHR_Common<0x15>; + def LSHR_eg : LSHR_Common<0x16>; + def LSHL_eg : LSHL_Common<0x17>; + def CNDE_eg : CNDE_Common<0x19>; + def CNDGT_eg : CNDGT_Common<0x1A>; + def CNDGE_eg : CNDGE_Common<0x1B>; + def MUL_LIT_eg : MUL_LIT_Common<0x1F>; + def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; + defm DOT4_eg : DOT4_Common<0xBE>; + defm CUBE_eg : CUBE_Common<0xC0>; + + def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; + + def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { + let Pattern = []; + } + + def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; + + def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { + let Pattern = []; + } + + def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; + + // TRUNC is used for the FLT_TO_INT instructions to work around a + // perceived problem where the rounding modes are applied differently + // depending on the instruction and the slot they are in. + // See: + // https://bugs.freedesktop.org/show_bug.cgi?id=50232 + // Mesa commit: a1a0974401c467cb86ef818f22df67c21774a38c + // + // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, + // which do not need to be truncated since the fp values are 0.0f or 1.0f. + // We should look into handling these cases separately. + def : Pat<(fp_to_sint R600_Reg32:$src0), + (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>; + + def : Pat<(fp_to_uint R600_Reg32:$src0), + (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>; + + def EG_ExportSwz : ExportSwzInst { + let Word1{19-16} = 1; // BURST_COUNT + let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{21} = eop; + let Word1{29-22} = inst; + let Word1{30} = 0; // MARK + let Word1{31} = 1; // BARRIER + } + defm : ExportPattern<EG_ExportSwz, 83>; + + def EG_ExportBuf : ExportBufInst { + let Word1{19-16} = 1; // BURST_COUNT + let Word1{20} = 1; // VALID_PIXEL_MODE + let Word1{21} = eop; + let Word1{29-22} = inst; + let Word1{30} = 0; // MARK + let Word1{31} = 1; // BARRIER + } + defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>; + +//===----------------------------------------------------------------------===// +// Memory read/write instructions +//===----------------------------------------------------------------------===// +let usesCustomInserter = 1 in { + +class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name, + list<dag> pattern> + : EG_CF_RAT <0x57, 0x2, 0, (outs), ins, + !strconcat(name, " $rw_gpr, $index_gpr, $eop"), pattern> { + let RIM = 0; + // XXX: Have a separate instruction for non-indexed writes. + let TYPE = 1; + let RW_REL = 0; + let ELEM_SIZE = 0; + + let ARRAY_SIZE = 0; + let COMP_MASK = comp_mask; + let BURST_COUNT = 0; + let VPM = 0; + let MARK = 0; + let BARRIER = 1; +} + +} // End usesCustomInserter = 1 + +// 32-bit store +def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + 0x1, "RAT_WRITE_CACHELESS_32_eg", + [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)] +>; + +//128-bit store +def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop), + 0xf, "RAT_WRITE_CACHELESS_128", + [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)] +>; + +class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern> + : InstR600ISA <outs, (ins MEMxi:$ptr), name#" $dst, $ptr", pattern> { + + // Operands + bits<7> DST_GPR; + bits<7> SRC_GPR; + + // Static fields + bits<5> VC_INST = 0; + bits<2> FETCH_TYPE = 2; + bits<1> FETCH_WHOLE_QUAD = 0; + bits<8> BUFFER_ID = buffer_id; + bits<1> SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + bits<2> SRC_SEL_X = 0; + bits<6> MEGA_FETCH_COUNT; + bits<1> DST_REL = 0; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + bits<1> USE_CONST_FIELDS = 0; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL = 1; + bits<1> FORMAT_COMP_ALL = 0; + bits<1> SRF_MODE_ALL = 0; + + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD0 + let Inst{4-0} = VC_INST; + let Inst{6-5} = FETCH_TYPE; + let Inst{7} = FETCH_WHOLE_QUAD; + let Inst{15-8} = BUFFER_ID; + let Inst{22-16} = SRC_GPR; + let Inst{23} = SRC_REL; + let Inst{25-24} = SRC_SEL_X; + let Inst{31-26} = MEGA_FETCH_COUNT; + + // VTX_WORD1_GPR + let Inst{38-32} = DST_GPR; + let Inst{39} = DST_REL; + let Inst{40} = 0; // Reserved + let Inst{43-41} = DST_SEL_X; + let Inst{46-44} = DST_SEL_Y; + let Inst{49-47} = DST_SEL_Z; + let Inst{52-50} = DST_SEL_W; + let Inst{53} = USE_CONST_FIELDS; + let Inst{59-54} = DATA_FORMAT; + let Inst{61-60} = NUM_FORMAT_ALL; + let Inst{62} = FORMAT_COMP_ALL; + let Inst{63} = SRF_MODE_ALL; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; +} + +class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_8", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { + + let MEGA_FETCH_COUNT = 1; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 1; // FMT_8 +} + +class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_16", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { + let MEGA_FETCH_COUNT = 2; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 5; // FMT_16 + +} + +class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_32", buffer_id, (outs R600_TReg32_X:$dst), + pattern> { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 + + // This is not really necessary, but there were some GPU hangs that appeared + // to be caused by ALU instructions in the next instruction group that wrote + // to the $ptr registers of the VTX_READ. + // e.g. + // %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24 + // %T2_X<def> = MOV %ZERO + //Adding this constraint prevents this from happening. + let Constraints = "$ptr.ptr = $dst"; +} + +class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> + : VTX_READ_eg <"VTX_READ_128", buffer_id, (outs R600_Reg128:$dst), + pattern> { + + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 + + // XXX: Need to force VTX_READ_128 instructions to write to the same register + // that holds its buffer address to avoid potential hangs. We can't use + // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst + // registers are different sizes. +} + +//===----------------------------------------------------------------------===// +// VTX Read from parameter memory space +//===----------------------------------------------------------------------===// + +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, + [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, + [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, + [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] +>; + +//===----------------------------------------------------------------------===// +// VTX Read from global memory space +//===----------------------------------------------------------------------===// + +// 8-bit reads +def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, + [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))] +>; + +// 32-bit reads +def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, + [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] +>; + +// 128-bit reads +def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, + [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] +>; + +//===----------------------------------------------------------------------===// +// Constant Loads +// XXX: We are currently storing all constants in the global address space. +//===----------------------------------------------------------------------===// + +def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, + [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))] +>; + +} + +let Predicates = [isCayman] in { + +let isVector = 1 in { + +def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; + +def MULLO_INT_cm : MULLO_INT_Common<0x8F>; +def MULHI_INT_cm : MULHI_INT_Common<0x90>; +def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; +def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; +def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; +def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; +def LOG_IEEE_ : LOG_IEEE_Common<0x83>; +def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; +def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; +def SIN_cm : SIN_Common<0x8D>; +def COS_cm : COS_Common<0x8E>; +} // End isVector = 1 + +def : SIN_PAT <SIN_cm>; +def : COS_PAT <COS_cm>; + +defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; + +// RECIP_UINT emulation for Cayman +def : Pat < + (AMDGPUurecip R600_Reg32:$src0), + (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), + (MOV_IMM_I32 0x4f800000))) +>; + + +def : Pat<(fsqrt R600_Reg32:$src), + (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; + +} // End isCayman + +//===----------------------------------------------------------------------===// +// Branch Instructions +//===----------------------------------------------------------------------===// + + +def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), + "IF_PREDICATE_SET $src", []>; + +def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src), + "PREDICATED_BREAK $src", []>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +let isPseudo = 1 in { + +def PRED_X : InstR600 < + 0, (outs R600_Predicate_Bit:$dst), + (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), + "", [], NullALU> { + let FlagOperandIdx = 3; +} + +let isTerminator = 1, isBranch = 1, isBarrier = 1 in { + +def JUMP : InstR600 <0x10, + (outs), + (ins brtarget:$target, R600_Pred:$p), + "JUMP $target ($p)", + [], AnyALU + >; + +} // End isTerminator = 1, isBranch = 1, isBarrier = 1 + +let usesCustomInserter = 1 in { + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in { + +def MASK_WRITE : AMDGPUShaderInst < + (outs), + (ins R600_Reg32:$src), + "MASK_WRITE $src", + [] +>; + +} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1 + +def R600_LOAD_CONST : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src0), + "R600_LOAD_CONST $dst, $src0", + [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] +>; + +def RESERVE_REG : AMDGPUShaderInst < + (outs), + (ins i32imm:$src), + "RESERVE_REG $src", + [(int_AMDGPU_reserve_reg imm:$src)] +>; + +def TXD: AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), + "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", + [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] +>; + +def TXD_SHADOW: AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), + "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", + [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] +>; + +} // End isPseudo = 1 +} // End usesCustomInserter = 1 + +def CLAMP_R600 : CLAMP <R600_Reg32>; +def FABS_R600 : FABS<R600_Reg32>; +def FNEG_R600 : FNEG<R600_Reg32>; + +//===---------------------------------------------------------------------===// +// Return instruction +//===---------------------------------------------------------------------===// +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { + def RETURN : ILFormat<(outs), (ins variable_ops), + "RETURN", [(IL_retflag)]>; +} + +//===--------------------------------------------------------------------===// +// Instructions support +//===--------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// +// Custom Inserter for Branches and returns, this eventually will be a +// seperate pass +//===---------------------------------------------------------------------===// +let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { + def BRANCH : ILFormat<(outs), (ins brtarget:$target), + "; Pseudo unconditional branch instruction", + [(br bb:$target)]>; + defm BRANCH_COND : BranchConditional<IL_brcond>; +} + +//===---------------------------------------------------------------------===// +// Flow and Program control Instructions +//===---------------------------------------------------------------------===// +let isTerminator=1 in { + def SWITCH : ILFormat< (outs), (ins GPRI32:$src), + !strconcat("SWITCH", " $src"), []>; + def CASE : ILFormat< (outs), (ins GPRI32:$src), + !strconcat("CASE", " $src"), []>; + def BREAK : ILFormat< (outs), (ins), + "BREAK", []>; + def CONTINUE : ILFormat< (outs), (ins), + "CONTINUE", []>; + def DEFAULT : ILFormat< (outs), (ins), + "DEFAULT", []>; + def ELSE : ILFormat< (outs), (ins), + "ELSE", []>; + def ENDSWITCH : ILFormat< (outs), (ins), + "ENDSWITCH", []>; + def ENDMAIN : ILFormat< (outs), (ins), + "ENDMAIN", []>; + def END : ILFormat< (outs), (ins), + "END", []>; + def ENDFUNC : ILFormat< (outs), (ins), + "ENDFUNC", []>; + def ENDIF : ILFormat< (outs), (ins), + "ENDIF", []>; + def WHILELOOP : ILFormat< (outs), (ins), + "WHILE", []>; + def ENDLOOP : ILFormat< (outs), (ins), + "ENDLOOP", []>; + def FUNC : ILFormat< (outs), (ins), + "FUNC", []>; + def RETDYN : ILFormat< (outs), (ins), + "RET_DYN", []>; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">; + defm IFC : BranchInstr2<"IFC">; + defm BREAKC : BranchInstr2<"BREAKC">; + defm CONTINUEC : BranchInstr2<"CONTINUEC">; +} + +//===----------------------------------------------------------------------===// +// ISel Patterns +//===----------------------------------------------------------------------===// + +//CNDGE_INT extra pattern +def : Pat < + (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1), + (i32 R600_Reg32:$src2), COND_GT), + (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) +>; + +// KIL Patterns +def KILP : Pat < + (int_AMDGPU_kilp), + (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) +>; + +def KIL : Pat < + (int_AMDGPU_kill R600_Reg32:$src0), + (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) +>; + +// SGT Reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), + (SGT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SGE Reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), + (SGE R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_INT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), + (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_INT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), + (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_UINT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), + (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_UINT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), + (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// The next two patterns are special cases for handling 'true if ordered' and +// 'true if unordered' conditionals. The assumption here is that the behavior of +// SETE and SNE conforms to the Direct3D 10 rules for floating point values +// described here: +// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit +// We assume that SETE returns false when one of the operands is NAN and +// SNE returns true when on of the operands is NAN + +//SETE - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), + (SETE R600_Reg32:$src0, R600_Reg32:$src1) +>; + +//SNE - 'true if unordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), + (SNE R600_Reg32:$src0, R600_Reg32:$src1) +>; + +def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; +def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; +def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; +def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; + +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>; +def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>; + +def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; +def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; +def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; +def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; + +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>; + +def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; +def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; + +// bitconvert patterns + +def : BitConvert <i32, f32, R600_Reg32>; +def : BitConvert <f32, i32, R600_Reg32>; +def : BitConvert <v4f32, v4i32, R600_Reg128>; +def : BitConvert <v4i32, v4f32, R600_Reg128>; + +// DWORDADDR pattern +def : DwordAddrPat <i32, R600_Reg32>; + +} // End isR600toCayman Predicate diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td new file mode 100644 index 0000000000..3825bc4d3b --- /dev/null +++ b/lib/Target/R600/R600Intrinsics.td @@ -0,0 +1,32 @@ +//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 Intrinsic Definitions +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "R600", isTarget = 1 in { + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_R600_load_input_perspective : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_constant : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_load_input_linear : + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>; + def int_R600_store_stream_output : + Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_R600_store_pixel_color : + Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; + def int_R600_store_pixel_depth : + Intrinsic<[], [llvm_float_ty], []>; + def int_R600_store_pixel_stencil : + Intrinsic<[], [llvm_float_ty], []>; + def int_R600_store_pixel_dummy : + Intrinsic<[], [], []>; +} diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp new file mode 100644 index 0000000000..4eb5efa19f --- /dev/null +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -0,0 +1,34 @@ +//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#include "R600MachineFunctionInfo.h" + +using namespace llvm; + +R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo(), + HasLinearInterpolation(false), + HasPerspectiveInterpolation(false) { + memset(Outputs, 0, sizeof(Outputs)); + memset(StreamOutputs, 0, sizeof(StreamOutputs)); + } + +unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const { + assert(HasPerspectiveInterpolation); + return 0; +} + +unsigned R600MachineFunctionInfo::GetIJLinearIndex() const { + assert(HasLinearInterpolation); + if (HasPerspectiveInterpolation) + return 1; + else + return 0; +} diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h new file mode 100644 index 0000000000..e97fb5be62 --- /dev/null +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -0,0 +1,39 @@ +//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef R600MACHINEFUNCTIONINFO_H +#define R600MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include <vector> + +namespace llvm { + +class R600MachineFunctionInfo : public MachineFunctionInfo { + +public: + R600MachineFunctionInfo(const MachineFunction &MF); + std::vector<unsigned> ReservedRegs; + SDNode *Outputs[16]; + SDNode *StreamOutputs[64][4]; + bool HasLinearInterpolation; + bool HasPerspectiveInterpolation; + + unsigned GetIJLinearIndex() const; + unsigned GetIJPerspectiveIndex() const; + +}; + +} // End llvm namespace + +#endif //R600MACHINEFUNCTIONINFO_H diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp new file mode 100644 index 0000000000..a39f83dbac --- /dev/null +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -0,0 +1,89 @@ +//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "R600RegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "R600Defines.h" +#include "R600MachineFunctionInfo.h" + +using namespace llvm; + +R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPURegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } + +BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); + + Reserved.set(AMDGPU::ZERO); + Reserved.set(AMDGPU::HALF); + Reserved.set(AMDGPU::ONE); + Reserved.set(AMDGPU::ONE_INT); + Reserved.set(AMDGPU::NEG_HALF); + Reserved.set(AMDGPU::NEG_ONE); + Reserved.set(AMDGPU::PV_X); + Reserved.set(AMDGPU::ALU_LITERAL_X); + Reserved.set(AMDGPU::PREDICATE_BIT); + Reserved.set(AMDGPU::PRED_SEL_OFF); + Reserved.set(AMDGPU::PRED_SEL_ZERO); + Reserved.set(AMDGPU::PRED_SEL_ONE); + + for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), + E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { + Reserved.set(*I); + } + + for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), + E = MFI->ReservedRegs.end(); I != E; ++I) { + Reserved.set(*I); + } + + return Reserved; +} + +const TargetRegisterClass * +R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { + switch (rc->getID()) { + case AMDGPU::GPRF32RegClassID: + case AMDGPU::GPRI32RegClassID: + return &AMDGPU::R600_Reg32RegClass; + default: return rc; + } +} + +unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { + return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; +} + +const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( + MVT VT) const { + switch(VT.SimpleTy) { + default: + case MVT::i32: return &AMDGPU::R600_TReg32RegClass; + } +} + +unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const { + switch (Channel) { + default: assert(!"Invalid channel index"); return 0; + case 0: return AMDGPU::sel_x; + case 1: return AMDGPU::sel_y; + case 2: return AMDGPU::sel_z; + case 3: return AMDGPU::sel_w; + } +} diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h new file mode 100644 index 0000000000..f9ca918f24 --- /dev/null +++ b/lib/Target/R600/R600RegisterInfo.h @@ -0,0 +1,55 @@ +//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface definition for R600RegisterInfo +// +//===----------------------------------------------------------------------===// + +#ifndef R600REGISTERINFO_H_ +#define R600REGISTERINFO_H_ + +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +namespace llvm { + +class R600TargetMachine; +class TargetInstrInfo; + +struct R600RegisterInfo : public AMDGPURegisterInfo { + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + /// \param RC is an AMDIL reg class. + /// + /// \returns the R600 reg class that is equivalent to \p RC. + virtual const TargetRegisterClass *getISARegClass( + const TargetRegisterClass *RC) const; + + /// \brief get the HW encoding for a register's channel. + unsigned getHWRegChan(unsigned reg) const; + + /// \brief get the register class of the specified type to use in the + /// CFGStructurizer + virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + + /// \returns the sub reg enum value for the given \p Channel + /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x) + unsigned getSubRegFromChannel(unsigned Channel) const; + +}; + +} // End namespace llvm + +#endif // AMDIDSAREGISTERINFO_H_ diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td new file mode 100644 index 0000000000..d3d6d25d29 --- /dev/null +++ b/lib/Target/R600/R600RegisterInfo.td @@ -0,0 +1,107 @@ + +class R600Reg <string name, bits<16> encoding> : Register<name> { + let Namespace = "AMDGPU"; + let HWEncoding = encoding; +} + +class R600RegWithChan <string name, bits<9> sel, string chan> : + Register <name> { + + field bits<2> chan_encoding = !if(!eq(chan, "X"), 0, + !if(!eq(chan, "Y"), 1, + !if(!eq(chan, "Z"), 2, + !if(!eq(chan, "W"), 3, 0)))); + let HWEncoding{8-0} = sel; + let HWEncoding{10-9} = chan_encoding; + let Namespace = "AMDGPU"; +} + +class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> : + RegisterWithSubRegs<n, subregs> { + let Namespace = "AMDGPU"; + let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; + let HWEncoding = encoding; +} + +foreach Index = 0-127 in { + foreach Chan = [ "X", "Y", "Z", "W" ] in { + // 32-bit Temporary Registers + def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>; + + // 32-bit Constant Registers (There are more than 128, this the number + // that is currently supported. + def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>; + } + // 128-bit Temporary Registers + def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW", + [!cast<Register>("T"#Index#"_X"), + !cast<Register>("T"#Index#"_Y"), + !cast<Register>("T"#Index#"_Z"), + !cast<Register>("T"#Index#"_W")], + Index>; +} + +// Array Base Register holding input in FS +foreach Index = 448-464 in { + def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; +} + + +// Special Registers + +def ZERO : R600Reg<"0.0", 248>; +def ONE : R600Reg<"1.0", 249>; +def NEG_ONE : R600Reg<"-1.0", 249>; +def ONE_INT : R600Reg<"1", 250>; +def HALF : R600Reg<"0.5", 252>; +def NEG_HALF : R600Reg<"-0.5", 252>; +def ALU_LITERAL_X : R600Reg<"literal.x", 253>; +def PV_X : R600Reg<"pv.x", 254>; +def PREDICATE_BIT : R600Reg<"PredicateBit", 0>; +def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>; +def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>; +def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>; + +def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "ArrayBase%u", 448, 464))>; + +def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave (sequence "C%u_X", 0, 127), + (sequence "C%u_Z", 0, 127)), + (interleave (sequence "C%u_Y", 0, 127), + (sequence "C%u_W", 0, 127))))>; + +def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "T%u_X", 0, 127))>; + +def R600_TReg32_Y : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "T%u_Y", 0, 127))>; + +def R600_TReg32_Z : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "T%u_Z", 0, 127))>; + +def R600_TReg32_W : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "T%u_W", 0, 127))>; + +def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (interleave + (interleave R600_TReg32_X, R600_TReg32_Z), + (interleave R600_TReg32_Y, R600_TReg32_W)))>; + +def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add + R600_TReg32, + R600_CReg32, + R600_ArrayBase, + ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; + +def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add + PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>; + +def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add + PREDICATE_BIT)>; + +def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, + (add (sequence "T%u_XYZW", 0, 127))> { + let CopyCost = -1; +} diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td new file mode 100644 index 0000000000..7ede181c51 --- /dev/null +++ b/lib/Target/R600/R600Schedule.td @@ -0,0 +1,36 @@ +//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction +// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS +// slot has been removed. +// +//===----------------------------------------------------------------------===// + + +def ALU_X : FuncUnit; +def ALU_Y : FuncUnit; +def ALU_Z : FuncUnit; +def ALU_W : FuncUnit; +def TRANS : FuncUnit; + +def AnyALU : InstrItinClass; +def VecALU : InstrItinClass; +def TransALU : InstrItinClass; + +def R600_EG_Itin : ProcessorItineraries < + [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL], + [], + [ + InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>, + InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>, + InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>, + InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]> + ] +>; diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp new file mode 100644 index 0000000000..f580377418 --- /dev/null +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -0,0 +1,329 @@ +//===-- SIAnnotateControlFlow.cpp - ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Annotates the control flow with hardware specific intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" + +using namespace llvm; + +namespace { + +// Complex types used in this pass +typedef std::pair<BasicBlock *, Value *> StackEntry; +typedef SmallVector<StackEntry, 16> StackVector; + +// Intrinsic names the control flow is annotated with +static const char *IfIntrinsic = "llvm.SI.if"; +static const char *ElseIntrinsic = "llvm.SI.else"; +static const char *BreakIntrinsic = "llvm.SI.break"; +static const char *IfBreakIntrinsic = "llvm.SI.if.break"; +static const char *ElseBreakIntrinsic = "llvm.SI.else.break"; +static const char *LoopIntrinsic = "llvm.SI.loop"; +static const char *EndCfIntrinsic = "llvm.SI.end.cf"; + +class SIAnnotateControlFlow : public FunctionPass { + + static char ID; + + Type *Boolean; + Type *Void; + Type *Int64; + Type *ReturnStruct; + + ConstantInt *BoolTrue; + ConstantInt *BoolFalse; + UndefValue *BoolUndef; + Constant *Int64Zero; + + Constant *If; + Constant *Else; + Constant *Break; + Constant *IfBreak; + Constant *ElseBreak; + Constant *Loop; + Constant *EndCf; + + DominatorTree *DT; + StackVector Stack; + SSAUpdater PhiInserter; + + bool isTopOfStack(BasicBlock *BB); + + Value *popSaved(); + + void push(BasicBlock *BB, Value *Saved); + + bool isElse(PHINode *Phi); + + void eraseIfUnused(PHINode *Phi); + + void openIf(BranchInst *Term); + + void insertElse(BranchInst *Term); + + void handleLoopCondition(Value *Cond); + + void handleLoop(BranchInst *Term); + + void closeControlFlow(BasicBlock *BB); + +public: + SIAnnotateControlFlow(): + FunctionPass(ID) { } + + virtual bool doInitialization(Module &M); + + virtual bool runOnFunction(Function &F); + + virtual const char *getPassName() const { + return "SI annotate control flow"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.addPreserved<DominatorTree>(); + FunctionPass::getAnalysisUsage(AU); + } + +}; + +} // end anonymous namespace + +char SIAnnotateControlFlow::ID = 0; + +/// \brief Initialize all the types and constants used in the pass +bool SIAnnotateControlFlow::doInitialization(Module &M) { + LLVMContext &Context = M.getContext(); + + Void = Type::getVoidTy(Context); + Boolean = Type::getInt1Ty(Context); + Int64 = Type::getInt64Ty(Context); + ReturnStruct = StructType::get(Boolean, Int64, (Type *)0); + + BoolTrue = ConstantInt::getTrue(Context); + BoolFalse = ConstantInt::getFalse(Context); + BoolUndef = UndefValue::get(Boolean); + Int64Zero = ConstantInt::get(Int64, 0); + + If = M.getOrInsertFunction( + IfIntrinsic, ReturnStruct, Boolean, (Type *)0); + + Else = M.getOrInsertFunction( + ElseIntrinsic, ReturnStruct, Int64, (Type *)0); + + Break = M.getOrInsertFunction( + BreakIntrinsic, Int64, Int64, (Type *)0); + + IfBreak = M.getOrInsertFunction( + IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0); + + ElseBreak = M.getOrInsertFunction( + ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0); + + Loop = M.getOrInsertFunction( + LoopIntrinsic, Boolean, Int64, (Type *)0); + + EndCf = M.getOrInsertFunction( + EndCfIntrinsic, Void, Int64, (Type *)0); + + return false; +} + +/// \brief Is BB the last block saved on the stack ? +bool SIAnnotateControlFlow::isTopOfStack(BasicBlock *BB) { + return Stack.back().first == BB; +} + +/// \brief Pop the last saved value from the control flow stack +Value *SIAnnotateControlFlow::popSaved() { + return Stack.pop_back_val().second; +} + +/// \brief Push a BB and saved value to the control flow stack +void SIAnnotateControlFlow::push(BasicBlock *BB, Value *Saved) { + Stack.push_back(std::make_pair(BB, Saved)); +} + +/// \brief Can the condition represented by this PHI node treated like +/// an "Else" block? +bool SIAnnotateControlFlow::isElse(PHINode *Phi) { + BasicBlock *IDom = DT->getNode(Phi->getParent())->getIDom()->getBlock(); + for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { + if (Phi->getIncomingBlock(i) == IDom) { + + if (Phi->getIncomingValue(i) != BoolTrue) + return false; + + } else { + if (Phi->getIncomingValue(i) != BoolFalse) + return false; + + } + } + return true; +} + +// \brief Erase "Phi" if it is not used any more +void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { + if (!Phi->hasNUsesOrMore(1)) + Phi->eraseFromParent(); +} + +/// \brief Open a new "If" block +void SIAnnotateControlFlow::openIf(BranchInst *Term) { + Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term); + Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); + push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); +} + +/// \brief Close the last "If" block and open a new "Else" block +void SIAnnotateControlFlow::insertElse(BranchInst *Term) { + Value *Ret = CallInst::Create(Else, popSaved(), "", Term); + Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term)); + push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term)); +} + +/// \brief Recursively handle the condition leading to a loop +void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { + if (PHINode *Phi = dyn_cast<PHINode>(Cond)) { + + // Handle all non constant incoming values first + for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = Phi->getIncomingValue(i); + if (isa<ConstantInt>(Incoming)) + continue; + + Phi->setIncomingValue(i, BoolFalse); + handleLoopCondition(Incoming); + } + + BasicBlock *Parent = Phi->getParent(); + BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); + + for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { + + Value *Incoming = Phi->getIncomingValue(i); + if (Incoming != BoolTrue) + continue; + + BasicBlock *From = Phi->getIncomingBlock(i); + if (From == IDom) { + CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt()); + if (OldEnd && OldEnd->getCalledFunction() == EndCf) { + Value *Args[] = { + OldEnd->getArgOperand(0), + PhiInserter.GetValueAtEndOfBlock(Parent) + }; + Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); + PhiInserter.AddAvailableValue(Parent, Ret); + continue; + } + } + + TerminatorInst *Insert = From->getTerminator(); + Value *Arg = PhiInserter.GetValueAtEndOfBlock(From); + Value *Ret = CallInst::Create(Break, Arg, "", Insert); + PhiInserter.AddAvailableValue(From, Ret); + } + eraseIfUnused(Phi); + + } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { + BasicBlock *Parent = Inst->getParent(); + TerminatorInst *Insert = Parent->getTerminator(); + Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) }; + Value *Ret = CallInst::Create(IfBreak, Args, "", Insert); + PhiInserter.AddAvailableValue(Parent, Ret); + + } else { + assert(0 && "Unhandled loop condition!"); + } +} + +/// \brief Handle a back edge (loop) +void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { + BasicBlock *Target = Term->getSuccessor(1); + PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); + + PhiInserter.Initialize(Int64, ""); + PhiInserter.AddAvailableValue(Target, Broken); + + Value *Cond = Term->getCondition(); + Term->setCondition(BoolTrue); + handleLoopCondition(Cond); + + BasicBlock *BB = Term->getParent(); + Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB); + for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); + PI != PE; ++PI) { + + Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI); + } + + Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); + push(Term->getSuccessor(0), Arg); +} + +/// \brief Close the last opened control flow +void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { + CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt()); +} + +/// \brief Annotate the control flow with intrinsics so the backend can +/// recognize if/then/else and loops. +bool SIAnnotateControlFlow::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTree>(); + + for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()), + E = df_end(&F.getEntryBlock()); I != E; ++I) { + + BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator()); + + if (!Term || Term->isUnconditional()) { + if (isTopOfStack(*I)) + closeControlFlow(*I); + continue; + } + + if (I.nodeVisited(Term->getSuccessor(1))) { + if (isTopOfStack(*I)) + closeControlFlow(*I); + handleLoop(Term); + continue; + } + + if (isTopOfStack(*I)) { + PHINode *Phi = dyn_cast<PHINode>(Term->getCondition()); + if (Phi && Phi->getParent() == *I && isElse(Phi)) { + insertElse(Term); + eraseIfUnused(Phi); + continue; + } + closeControlFlow(*I); + } + openIf(Term); + } + + assert(Stack.empty()); + return true; +} + +/// \brief Create the annotation pass +FunctionPass *llvm::createSIAnnotateControlFlowPass() { + return new SIAnnotateControlFlow(); +} diff --git a/lib/Target/R600/SIAssignInterpRegs.cpp b/lib/Target/R600/SIAssignInterpRegs.cpp new file mode 100644 index 0000000000..832e44d766 --- /dev/null +++ b/lib/Target/R600/SIAssignInterpRegs.cpp @@ -0,0 +1,152 @@ +//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass maps the pseudo interpolation registers to the correct physical +/// registers. +// +/// Prior to executing a fragment shader, the GPU loads interpolation +/// parameters into physical registers. The specific physical register that each +/// interpolation parameter ends up in depends on the type of the interpolation +/// parameter as well as how many interpolation parameters are used by the +/// shader. +// +//===----------------------------------------------------------------------===// + + + +#include "AMDGPU.h" +#include "AMDIL.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SIAssignInterpRegsPass : public MachineFunctionPass { + +private: + static char ID; + TargetMachine &TM; + + void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, + unsigned physReg, unsigned virtReg); + +public: + SIAssignInterpRegsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "SI Assign intrpolation registers"; } +}; + +} // End anonymous namespace + +char SIAssignInterpRegsPass::ID = 0; + +#define INTERP_VALUES 16 +#define REQUIRED_VALUE_MAX_INDEX 7 + +struct InterpInfo { + bool Enabled; + unsigned Regs[3]; + unsigned RegCount; +}; + + +FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { + return new SIAssignInterpRegsPass(tm); +} + +bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) { + + struct InterpInfo InterpUse[INTERP_VALUES] = { + {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, + {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, + {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, + {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, + {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, + {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, + {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, + {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, + {false, {AMDGPU::POS_X_FLOAT}, 1}, + {false, {AMDGPU::POS_Y_FLOAT}, 1}, + {false, {AMDGPU::POS_Z_FLOAT}, 1}, + {false, {AMDGPU::POS_W_FLOAT}, 1}, + {false, {AMDGPU::FRONT_FACE}, 1}, + {false, {AMDGPU::ANCILLARY}, 1}, + {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, + {false, {AMDGPU::POS_FIXED_PT}, 1} + }; + + SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); + // This pass is only needed for pixel shaders. + if (MFI->ShaderType != ShaderType::PIXEL) { + return false; + } + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool ForceEnable = true; + + // First pass, mark the interpolation values that are used. + for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { + for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; + RegIdx++) { + InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled || + !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]); + if (InterpUse[InterpIdx].Enabled && + InterpIdx <= REQUIRED_VALUE_MAX_INDEX) { + ForceEnable = false; + } + } + } + + // At least one interpolation mode must be enabled or else the GPU will hang. + if (ForceEnable) { + InterpUse[0].Enabled = true; + } + + unsigned UsedVgprs = 0; + + // Second pass, replace with VGPRs. + for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { + if (!InterpUse[InterpIdx].Enabled) { + continue; + } + MFI->SPIPSInputAddr |= (1 << InterpIdx); + + for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; + RegIdx++, UsedVgprs++) { + unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs); + unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg); + addLiveIn(&MF, MRI, NewReg, VirtReg); + } + } + + return false; +} + +void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF, + MachineRegisterInfo & MRI, + unsigned physReg, unsigned virtReg) { + const TargetInstrInfo * TII = TM.getInstrInfo(); + if (!MRI.isLiveIn(physReg)) { + MRI.addLiveIn(physReg, virtReg); + MF->front().addLiveIn(physReg); + BuildMI(MF->front(), MF->front().begin(), DebugLoc(), + TII->get(TargetOpcode::COPY), virtReg) + .addReg(physReg); + } else { + MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); + } +} diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp new file mode 100644 index 0000000000..4c672ca7c1 --- /dev/null +++ b/lib/Target/R600/SIISelLowering.cpp @@ -0,0 +1,511 @@ +//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Custom DAG lowering for SI +// +//===----------------------------------------------------------------------===// + +#include "SIISelLowering.h" +#include "AMDIL.h" +#include "AMDILIntrinsicInfo.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" + +using namespace llvm; + +SITargetLowering::SITargetLowering(TargetMachine &TM) : + AMDGPUTargetLowering(TM), + TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) { + addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); + addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); + + addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); + + computeRegisterProperties(); + + setOperationAction(ISD::AND, MVT::i1, Custom); + + setOperationAction(ISD::ADD, MVT::i64, Legal); + setOperationAction(ISD::ADD, MVT::i32, Legal); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // We need to custom lower loads from the USER_SGPR address space, so we can + // add the SGPRs as livein registers. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setTargetDAGCombine(ISD::SELECT_CC); + + setTargetDAGCombine(ISD::SETCC); +} + +MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( + MachineInstr * MI, MachineBasicBlock * BB) const { + const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); + MachineBasicBlock::iterator I = MI; + + if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { + AppendS_WAITCNT(MI, *BB, llvm::next(I)); + return BB; + } + + switch (MI->getOpcode()) { + default: + return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case AMDGPU::BRANCH: return BB; + case AMDGPU::CLAMP_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + // VSRC1-2 are unused, but we still need to fill all the + // operand slots, so we just reuse the VSRC0 operand + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(0) // ABS + .addImm(1) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG + MI->eraseFromParent(); + break; + + case AMDGPU::FABS_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + // VSRC1-2 are unused, but we still need to fill all the + // operand slots, so we just reuse the VSRC0 operand + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(1) // ABS + .addImm(0) // CLAMP + .addImm(0) // OMOD + .addImm(0); // NEG + MI->eraseFromParent(); + break; + + case AMDGPU::FNEG_SI: + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + // VSRC1-2 are unused, but we still need to fill all the + // operand slots, so we just reuse the VSRC0 operand + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(1)) + .addImm(0) // ABS + .addImm(0) // CLAMP + .addImm(0) // OMOD + .addImm(1); // NEG + MI->eraseFromParent(); + break; + case AMDGPU::SHADER_TYPE: + BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType = + MI->getOperand(0).getImm(); + MI->eraseFromParent(); + break; + + case AMDGPU::SI_INTERP: + LowerSI_INTERP(MI, *BB, I, MRI); + break; + case AMDGPU::SI_INTERP_CONST: + LowerSI_INTERP_CONST(MI, *BB, I, MRI); + break; + case AMDGPU::SI_KIL: + LowerSI_KIL(MI, *BB, I, MRI); + break; + case AMDGPU::SI_WQM: + LowerSI_WQM(MI, *BB, I, MRI); + break; + case AMDGPU::SI_V_CNDLT: + LowerSI_V_CNDLT(MI, *BB, I, MRI); + break; + } + return BB; +} + +void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const { + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); +} + + +void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { + unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); + MachineOperand dst = MI->getOperand(0); + MachineOperand iReg = MI->getOperand(1); + MachineOperand jReg = MI->getOperand(2); + MachineOperand attr_chan = MI->getOperand(3); + MachineOperand attr = MI->getOperand(4); + MachineOperand params = MI->getOperand(5); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) + .addOperand(params); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) + .addOperand(iReg) + .addOperand(attr_chan) + .addOperand(attr) + .addReg(M0); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) + .addOperand(dst) + .addReg(tmp) + .addOperand(jReg) + .addOperand(attr_chan) + .addOperand(attr) + .addReg(M0); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineRegisterInfo &MRI) const { + MachineOperand dst = MI->getOperand(0); + MachineOperand attr_chan = MI->getOperand(1); + MachineOperand attr = MI->getOperand(2); + MachineOperand params = MI->getOperand(3); + unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) + .addOperand(params); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) + .addOperand(dst) + .addOperand(attr_chan) + .addOperand(attr) + .addReg(M0); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { + // Clear this pixel from the exec mask if the operand is negative + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), + AMDGPU::VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI->getOperand(0)); + + MI->eraseFromParent(); +} + +void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { + unsigned VCC = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + + BuildMI(BB, I, BB.findDebugLoc(I), + TII->get(AMDGPU::V_CMP_GT_F32_e32), + VCC) + .addReg(AMDGPU::SREG_LIT_0) + .addOperand(MI->getOperand(1)); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32_e32)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(2)) + .addReg(VCC); + + MI->eraseFromParent(); +} + +EVT SITargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i1; +} + +//===----------------------------------------------------------------------===// +// Custom DAG Lowering Operations +//===----------------------------------------------------------------------===// + +SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntrinsicID = + cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + EVT VT = Op.getValueType(); + switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_vs_load_buffer_index: + return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, + AMDGPU::VGPR0, VT); + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + } + break; + } + } + return SDValue(); +} + +/// \brief The function is for lowering i1 operations on the +/// VCC register. +/// +/// In the VALU context, VCC is a one bit register, but in the +/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only +/// the SALU can perform operations on the VCC register, we need to promote +/// the operand types from i1 to i64 in order for tablegen to be able to match +/// this operation to the correct SALU instruction. We do this promotion by +/// wrapping the operands in a CopyToReg node. +/// +SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, + SelectionDAG &DAG, + unsigned VCCNode) const { + DebugLoc DL = Op.getDebugLoc(); + + SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, + DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, + Op.getOperand(0)), + DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, + Op.getOperand(1))); + + return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); +} + +/// \brief Helper function for LowerBRCOND +static SDNode *findUser(SDValue Value, unsigned Opcode) { + + SDNode *Parent = Value.getNode(); + for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end(); + I != E; ++I) { + + if (I.getUse().get() != Value) + continue; + + if (I->getOpcode() == Opcode) + return *I; + } + return 0; +} + +/// This transforms the control flow intrinsics to get the branch destination as +/// last parameter, also switches branch target with BR if the need arise +SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, + SelectionDAG &DAG) const { + + DebugLoc DL = BRCOND.getDebugLoc(); + + SDNode *Intr = BRCOND.getOperand(1).getNode(); + SDValue Target = BRCOND.getOperand(2); + SDNode *BR = 0; + + if (Intr->getOpcode() == ISD::SETCC) { + // As long as we negate the condition everything is fine + SDNode *SetCC = Intr; + assert(SetCC->getConstantOperandVal(1) == 1); + assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() == + ISD::SETNE); + Intr = SetCC->getOperand(0).getNode(); + + } else { + // Get the target from BR if we don't negate the condition + BR = findUser(BRCOND, ISD::BR); + Target = BR->getOperand(1); + } + + assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); + + // Build the result and + SmallVector<EVT, 4> Res; + for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i) + Res.push_back(Intr->getValueType(i)); + + // operands of the new intrinsic call + SmallVector<SDValue, 4> Ops; + Ops.push_back(BRCOND.getOperand(0)); + for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i) + Ops.push_back(Intr->getOperand(i)); + Ops.push_back(Target); + + // build the new intrinsic call + SDNode *Result = DAG.getNode( + Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, + DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode(); + + if (BR) { + // Give the branch instruction our target + SDValue Ops[] = { + BR->getOperand(0), + BRCOND.getOperand(2) + }; + DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2); + } + + SDValue Chain = SDValue(Result, Result->getNumValues() - 1); + + // Copy the intrinsic results to registers + for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) { + SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg); + if (!CopyToReg) + continue; + + Chain = DAG.getCopyToReg( + Chain, DL, + CopyToReg->getOperand(1), + SDValue(Result, i - 1), + SDValue()); + + DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0)); + } + + // Remove the old intrinsic from the chain + DAG.ReplaceAllUsesOfValueWith( + SDValue(Intr, Intr->getNumValues() - 1), + Intr->getOperand(0)); + + return Chain; +} + +SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op); + + assert(Ptr); + + unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); + + // We only need to lower USER_SGPR address space loads + if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { + return SDValue(); + } + + // Loads from the USER_SGPR address space can only have constant value + // pointers. + ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr()); + assert(BasePtr); + + unsigned TypeDwordWidth = VT.getSizeInBits() / 32; + const TargetRegisterClass * dstClass; + switch (TypeDwordWidth) { + default: + assert(!"USER_SGPR value size not implemented"); + return SDValue(); + case 1: + dstClass = &AMDGPU::SReg_32RegClass; + break; + case 2: + dstClass = &AMDGPU::SReg_64RegClass; + break; + } + uint64_t Index = BasePtr->getZExtValue(); + assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); + unsigned SGPRIndex = Index / TypeDwordWidth; + unsigned Reg = dstClass->getRegister(SGPRIndex); + + DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, + VT)); + return SDValue(); +} + +SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue True = Op.getOperand(2); + SDValue False = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + EVT VT = Op.getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // Possible Min/Max pattern + SDValue MinMax = LowerMinMax(Op, DAG); + if (MinMax.getNode()) { + return MinMax; + } + + SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); + return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); +} + +//===----------------------------------------------------------------------===// +// Custom DAG optimizations +//===----------------------------------------------------------------------===// + +SDValue SITargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + switch (N->getOpcode()) { + default: break; + case ISD::SELECT_CC: { + N->dump(); + ConstantSDNode *True, *False; + // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) + if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) + && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) + && True->isAllOnesValue() + && False->isNullValue() + && VT == MVT::i1) { + return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), + N->getOperand(1), N->getOperand(4)); + + } + break; + } + case ISD::SETCC: { + SDValue Arg0 = N->getOperand(0); + SDValue Arg1 = N->getOperand(1); + SDValue CC = N->getOperand(2); + ConstantSDNode * C = NULL; + ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); + + // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) + if (VT == MVT::i1 + && Arg0.getOpcode() == ISD::SIGN_EXTEND + && Arg0.getOperand(0).getValueType() == MVT::i1 + && (C = dyn_cast<ConstantSDNode>(Arg1)) + && C->isNullValue() + && CCOp == ISD::SETNE) { + return SimplifySetCC(VT, Arg0.getOperand(0), + DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); + } + break; + } + } + return SDValue(); +} + +#define NODE_NAME_CASE(node) case SIISD::node: return #node; + +const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); + NODE_NAME_CASE(VCC_AND) + NODE_NAME_CASE(VCC_BITCAST) + } +} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h new file mode 100644 index 0000000000..c088112652 --- /dev/null +++ b/lib/Target/R600/SIISelLowering.h @@ -0,0 +1,62 @@ +//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief SI DAG Lowering interface definition +// +//===----------------------------------------------------------------------===// + +#ifndef SIISELLOWERING_H +#define SIISELLOWERING_H + +#include "AMDGPUISelLowering.h" +#include "SIInstrInfo.h" + +namespace llvm { + +class SITargetLowering : public AMDGPUTargetLowering { + const SIInstrInfo * TII; + + /// Memory reads and writes are syncronized using the S_WAITCNT instruction. + /// This function takes the most conservative approach and inserts an + /// S_WAITCNT instruction after every read and write. + void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I) const; + void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, unsigned Opocde) const; + void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const; + void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, + MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; + + SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG, + unsigned VCCNode) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + +public: + SITargetLowering(TargetMachine &tm); + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, + MachineBasicBlock * BB) const; + virtual EVT getSetCCResultType(EVT VT) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + virtual const char* getTargetNodeName(unsigned Opcode) const; +}; + +} // End namespace llvm + +#endif //SIISELLOWERING_H diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td new file mode 100644 index 0000000000..aea3b5a888 --- /dev/null +++ b/lib/Target/R600/SIInstrFormats.td @@ -0,0 +1,146 @@ +//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SI Instruction format definitions. +// +// Instructions with _32 take 32-bit operands. +// Instructions with _64 take 64-bit operands. +// +// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit +// encoding is the standard encoding, but instruction that make use of +// any of the instruction modifiers must use the 64-bit encoding. +// +// Instructions with _e32 use the 32-bit encoding. +// Instructions with _e64 use the 64-bit encoding. +// +//===----------------------------------------------------------------------===// + +class VOP3b_2IN <bits<9> op, string opName, RegisterClass dstClass, + RegisterClass src0Class, RegisterClass src1Class, + list<dag> pattern> + : VOP3b <op, (outs dstClass:$vdst), + (ins src0Class:$src0, src1Class:$src1, InstFlag:$src2, InstFlag:$sdst, + InstFlag:$omod, InstFlag:$neg), + opName, pattern +>; + + +class VOP3_1_32 <bits<9> op, string opName, list<dag> pattern> + : VOP3b_2IN <op, opName, SReg_1, AllReg_32, VReg_32, pattern>; + +class VOP3_32 <bits<9> op, string opName, list<dag> pattern> + : VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; + +class VOP3_64 <bits<9> op, string opName, list<dag> pattern> + : VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, VReg_64:$src1, VReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>; + + +class SOP1_32 <bits<8> op, string opName, list<dag> pattern> + : SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>; + +class SOP1_64 <bits<8> op, string opName, list<dag> pattern> + : SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>; + +class SOP2_32 <bits<7> op, string opName, list<dag> pattern> + : SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; + +class SOP2_64 <bits<7> op, string opName, list<dag> pattern> + : SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; + +class SOP2_VCC <bits<7> op, string opName, list<dag> pattern> + : SOP2 <op, (outs SReg_1:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; + +class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : + VOP1 < + op, (outs vrc:$dst), (ins arc:$src0), opName, pattern + >; + +multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> { + def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>; + def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> { + + def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>; + + def _e64 : VOP3_64 < + {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : + VOP2 < + op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern + >; + +multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> { + + def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>; + + def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> { + def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>; + + def _e64 : VOP3_64 < + {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +class SOPK_32 <bits<5> op, string opName, list<dag> pattern> + : SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>; + +class SOPK_64 <bits<5> op, string opName, list<dag> pattern> + : SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>; + +class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : + VOPC < + op, (ins arc:$src0, vrc:$src1), opName, pattern + >; + +multiclass VOPC_32 <bits<9> op, string opName, list<dag> pattern> { + + def _e32 : VOPC_Helper < + {op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + VReg_32, AllReg_32, opName, pattern + >; + + def _e64 : VOP3_1_32 < + op, + opName, pattern + >; +} + +multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> { + + def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>; + + def _e64 : VOP3_64 < + {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, + opName, [] + >; +} + +class SOPC_32 <bits<7> op, string opName, list<dag> pattern> + : SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>; + +class SOPC_64 <bits<7> op, string opName, list<dag> pattern> + : SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>; + diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp new file mode 100644 index 0000000000..c6ad4d531d --- /dev/null +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -0,0 +1,89 @@ +//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief SI Implementation of TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + + +#include "SIInstrInfo.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include <stdio.h> + +using namespace llvm; + +SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) + : AMDGPUInstrInfo(tm), + RI(tm, *this) + { } + +const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { + return RI; +} + +void +SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // If we are trying to copy to or from SCC, there is a bug somewhere else in + // the backend. While it may be theoretically possible to do this, it should + // never be necessary. + assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); + + if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || + AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + assert(AMDGPU::SReg_32RegClass.contains(DestReg)); + assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } +} + +MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const { + MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc()); + MachineInstrBuilder MIB(*MF, MI); + MIB.addReg(DstReg, RegState::Define); + MIB.addImm(Imm); + + return MI; + +} + +bool SIInstrInfo::isMov(unsigned Opcode) const { + switch(Opcode) { + default: return false; + case AMDGPU::S_MOV_B32: + case AMDGPU::S_MOV_B64: + case AMDGPU::V_MOV_B32_e32: + case AMDGPU::V_MOV_B32_e64: + case AMDGPU::V_MOV_IMM_F32: + case AMDGPU::V_MOV_IMM_I32: + case AMDGPU::S_MOV_IMM_I32: + return true; + } +} + +bool +SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + return RC != &AMDGPU::EXECRegRegClass; +} diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h new file mode 100644 index 0000000000..631f6c00cc --- /dev/null +++ b/lib/Target/R600/SIInstrInfo.h @@ -0,0 +1,62 @@ +//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface definition for SIInstrInfo. +// +//===----------------------------------------------------------------------===// + + +#ifndef SIINSTRINFO_H +#define SIINSTRINFO_H + +#include "AMDGPUInstrInfo.h" +#include "SIRegisterInfo.h" + +namespace llvm { + +class SIInstrInfo : public AMDGPUInstrInfo { +private: + const SIRegisterInfo RI; + +public: + explicit SIInstrInfo(AMDGPUTargetMachine &tm); + + const SIRegisterInfo &getRegisterInfo() const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + /// \returns the encoding type of this instruction. + unsigned getEncodingType(const MachineInstr &MI) const; + + /// \returns the size of this instructions encoding in number of bytes. + unsigned getEncodingBytes(const MachineInstr &MI) const; + + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const; + + virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;} + virtual bool isMov(unsigned Opcode) const; + + virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + }; + +} // End namespace llvm + +namespace SIInstrFlags { + enum Flags { + // First 4 bits are the instruction encoding + NEED_WAIT = 1 << 4 + }; +} + +#endif //SIINSTRINFO_H diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td new file mode 100644 index 0000000000..873a451e99 --- /dev/null +++ b/lib/Target/R600/SIInstrInfo.td @@ -0,0 +1,589 @@ +//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SI DAG Profiles +//===----------------------------------------------------------------------===// +def SDTVCCBinaryOp : SDTypeProfile<1, 2, [ + SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> +]>; + +//===----------------------------------------------------------------------===// +// SI DAG Nodes +//===----------------------------------------------------------------------===// + +// and operation on 64-bit wide vcc +def SIsreg1_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, + [SDNPCommutative, SDNPAssociative] +>; + +// Special bitcast node for sharing VCC register between VALU and SALU +def SIsreg1_bitcast : SDNode<"SIISD::VCC_BITCAST", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> +>; + +// and operation on 64-bit wide vcc +def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp, + [SDNPCommutative, SDNPAssociative] +>; + +// Special bitcast node for sharing VCC register between VALU and SALU +def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]> +>; + +class InstSI <dag outs, dag ins, string asm, list<dag> pattern> : + AMDGPUInst<outs, ins, asm, pattern> { + + field bits<4> EncodingType = 0; + field bits<1> NeedWait = 0; + + let TSFlags{3-0} = EncodingType; + let TSFlags{4} = NeedWait; + +} + +class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> : + InstSI <outs, ins, asm, pattern> { + + field bits<32> Inst; +} + +class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : + InstSI <outs, ins, asm, pattern> { + + field bits<64> Inst; +} + +class SIOperand <ValueType vt, dag opInfo>: Operand <vt> { + let EncoderMethod = "encodeOperand"; + let MIOperandInfo = opInfo; +} + +def IMM16bit : ImmLeaf < + i16, + [{return isInt<16>(Imm);}] +>; + +def IMM8bit : ImmLeaf < + i32, + [{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}] +>; + +def IMM12bit : ImmLeaf < + i16, + [{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}] +>; + +def IMM32bitIn64bit : ImmLeaf < + i64, + [{return isInt<32>(Imm);}] +>; + +class GPR4Align <RegisterClass rc> : Operand <vAny> { + let EncoderMethod = "GPR4AlignEncode"; + let MIOperandInfo = (ops rc:$reg); +} + +class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> { + let EncoderMethod = "GPR2AlignEncode"; + let MIOperandInfo = (ops rc:$reg); +} + +def SMRDmemrr : Operand<iPTR> { + let MIOperandInfo = (ops SReg_64, SReg_32); + let EncoderMethod = "GPR2AlignEncode"; +} + +def SMRDmemri : Operand<iPTR> { + let MIOperandInfo = (ops SReg_64, i32imm); + let EncoderMethod = "SMRDmemriEncode"; +} + +def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>; +def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>; + +let Uses = [EXEC] in { + +def EXP : Enc64< + (outs), + (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + "EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", + [] > { + + bits<4> EN; + bits<6> TGT; + bits<1> COMPR; + bits<1> DONE; + bits<1> VM; + bits<8> VSRC0; + bits<8> VSRC1; + bits<8> VSRC2; + bits<8> VSRC3; + + let Inst{3-0} = EN; + let Inst{9-4} = TGT; + let Inst{10} = COMPR; + let Inst{11} = DONE; + let Inst{12} = VM; + let Inst{31-26} = 0x3e; + let Inst{39-32} = VSRC0; + let Inst{47-40} = VSRC1; + let Inst{55-48} = VSRC2; + let Inst{63-56} = VSRC3; + let EncodingType = 0; //SIInstrEncodingType::EXP + + let NeedWait = 1; + let usesCustomInserter = 1; +} + +class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + bits<8> VDATA; + bits<4> DMASK; + bits<1> UNORM; + bits<1> GLC; + bits<1> DA; + bits<1> R128; + bits<1> TFE; + bits<1> LWE; + bits<1> SLC; + bits<8> VADDR; + bits<5> SRSRC; + bits<5> SSAMP; + + let Inst{11-8} = DMASK; + let Inst{12} = UNORM; + let Inst{13} = GLC; + let Inst{14} = DA; + let Inst{15} = R128; + let Inst{16} = TFE; + let Inst{17} = LWE; + let Inst{24-18} = op; + let Inst{25} = SLC; + let Inst{31-26} = 0x3c; + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{57-53} = SSAMP; + + let EncodingType = 2; //SIInstrEncodingType::MIMG + + let NeedWait = 1; + let usesCustomInserter = 1; +} + +class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64<outs, ins, asm, pattern> { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<4> DFMT; + bits<3> NFMT; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{18-16} = op; + let Inst{22-19} = DFMT; + let Inst{25-23} = NFMT; + let Inst{31-26} = 0x3a; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + let EncodingType = 3; //SIInstrEncodingType::MTBUF + + let NeedWait = 1; + let usesCustomInserter = 1; + let neverHasSideEffects = 1; +} + +class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64<outs, ins, asm, pattern> { + + bits<8> VDATA; + bits<12> OFFSET; + bits<1> OFFEN; + bits<1> IDXEN; + bits<1> GLC; + bits<1> ADDR64; + bits<1> LDS; + bits<8> VADDR; + bits<5> SRSRC; + bits<1> SLC; + bits<1> TFE; + bits<8> SOFFSET; + + let Inst{11-0} = OFFSET; + let Inst{12} = OFFEN; + let Inst{13} = IDXEN; + let Inst{14} = GLC; + let Inst{15} = ADDR64; + let Inst{16} = LDS; + let Inst{24-18} = op; + let Inst{31-26} = 0x38; //encoding + let Inst{39-32} = VADDR; + let Inst{47-40} = VDATA; + let Inst{52-48} = SRSRC; + let Inst{54} = SLC; + let Inst{55} = TFE; + let Inst{63-56} = SOFFSET; + let EncodingType = 4; //SIInstrEncodingType::MUBUF + + let NeedWait = 1; + let usesCustomInserter = 1; + let neverHasSideEffects = 1; +} + +} // End Uses = [EXEC] + +class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32<outs, ins, asm, pattern> { + + bits<7> SDST; + bits<15> PTR; + bits<8> OFFSET = PTR{7-0}; + bits<1> IMM = PTR{8}; + bits<6> SBASE = PTR{14-9}; + + let Inst{7-0} = OFFSET; + let Inst{8} = IMM; + let Inst{14-9} = SBASE; + let Inst{21-15} = SDST; + let Inst{26-22} = op; + let Inst{31-27} = 0x18; //encoding + let EncodingType = 5; //SIInstrEncodingType::SMRD + + let NeedWait = 1; + let usesCustomInserter = 1; +} + +class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32<outs, ins, asm, pattern> { + + bits<7> SDST; + bits<8> SSRC0; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = op; + let Inst{22-16} = SDST; + let Inst{31-23} = 0x17d; //encoding; + let EncodingType = 6; //SIInstrEncodingType::SOP1 + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<7> SDST; + bits<8> SSRC0; + bits<8> SSRC1; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = SDST; + let Inst{29-23} = op; + let Inst{31-30} = 0x2; // encoding + let EncodingType = 7; // SIInstrEncodingType::SOP2 + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32<outs, ins, asm, pattern> { + + bits<8> SSRC0; + bits<8> SSRC1; + + let Inst{7-0} = SSRC0; + let Inst{15-8} = SSRC1; + let Inst{22-16} = op; + let Inst{31-23} = 0x17e; + let EncodingType = 8; // SIInstrEncodingType::SOPC + + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins , asm, pattern> { + + bits <7> SDST; + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = SDST; + let Inst{27-23} = op; + let Inst{31-28} = 0xb; //encoding + let EncodingType = 9; // SIInstrEncodingType::SOPK + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 < + (outs), + ins, + asm, + pattern > { + + bits <16> SIMM16; + + let Inst{15-0} = SIMM16; + let Inst{22-16} = op; + let Inst{31-23} = 0x17f; // encoding + let EncodingType = 10; // SIInstrEncodingType::SOPP + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +let Uses = [EXEC] in { + +class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<8> VSRC; + bits<2> ATTRCHAN; + bits<6> ATTR; + + let Inst{7-0} = VSRC; + let Inst{9-8} = ATTRCHAN; + let Inst{15-10} = ATTR; + let Inst{17-16} = op; + let Inst{25-18} = VDST; + let Inst{31-26} = 0x32; // encoding + let EncodingType = 11; // SIInstrEncodingType::VINTRP + + let neverHasSideEffects = 1; + let mayLoad = 1; + let mayStore = 0; +} + +class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + + let Inst{8-0} = SRC0; + let Inst{16-9} = op; + let Inst{24-17} = VDST; + let Inst{31-25} = 0x3f; //encoding + + let EncodingType = 12; // SIInstrEncodingType::VOP1 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc32 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = VDST; + let Inst{30-25} = op; + let Inst{31} = 0x0; //encoding + + let EncodingType = 13; // SIInstrEncodingType::VOP2 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<3> ABS; + bits<1> CLAMP; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{10-8} = ABS; + let Inst{11} = CLAMP; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let EncodingType = 14; // SIInstrEncodingType::VOP3 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + bits<8> VDST; + bits<9> SRC0; + bits<9> SRC1; + bits<9> SRC2; + bits<7> SDST; + bits<2> OMOD; + bits<3> NEG; + + let Inst{7-0} = VDST; + let Inst{14-8} = SDST; + let Inst{25-17} = op; + let Inst{31-26} = 0x34; //encoding + let Inst{40-32} = SRC0; + let Inst{49-41} = SRC1; + let Inst{58-50} = SRC2; + let Inst{60-59} = OMOD; + let Inst{63-61} = NEG; + + let EncodingType = 14; // SIInstrEncodingType::VOP3 + let PostEncoderMethod = "VOPPostEncode"; + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : + Enc32 <(outs VCCReg:$dst), ins, asm, pattern> { + + bits<9> SRC0; + bits<8> VSRC1; + + let Inst{8-0} = SRC0; + let Inst{16-9} = VSRC1; + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; + + let EncodingType = 15; //SIInstrEncodingType::VOPC + let PostEncoderMethod = "VOPPostEncode"; + let DisableEncoding = "$dst"; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +} // End Uses = [EXEC] + +class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < + op, + (outs VReg_128:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr, + GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc, + i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs regClass:$dst), + (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, + i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayLoad = 1; + let mayStore = 0; +} + +class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < + op, + (outs), + (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, + GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset), + asm, + []> { + let mayStore = 1; + let mayLoad = 0; +} + +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass, + ValueType vt> { + def _IMM : SMRD < + op, + (outs dstClass:$dst), + (ins SMRDmemri:$src0), + asm, + [(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))] + >; + + def _SGPR : SMRD < + op, + (outs dstClass:$dst), + (ins SMRDmemrr:$src0), + asm, + [(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))] + >; +} + +multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> { + defm _F32 : SMRD_Helper <op, asm, dstClass, f32>; + defm _I32 : SMRD_Helper <op, asm, dstClass, i32>; +} + +include "SIInstrFormats.td" +include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td new file mode 100644 index 0000000000..005be96645 --- /dev/null +++ b/lib/Target/R600/SIInstructions.td @@ -0,0 +1,1351 @@ +//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This file was originally auto-generated from a GPU register header file and +// all the instruction definitions were originally commented out. Instructions +// that are not yet supported remain commented out. +//===----------------------------------------------------------------------===// + +def isSI : Predicate<"Subtarget.device()" + "->getGeneration() == AMDGPUDeviceInfo::HD7XXX">; + +let Predicates = [isSI] in { + +let neverHasSideEffects = 1 in { +def S_MOV_B32 : SOP1_32 <0x00000003, "S_MOV_B32", []>; +def S_MOV_B64 : SOP1_64 <0x00000004, "S_MOV_B64", []>; +def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; +def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; +def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; +def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; +def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; +def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; +def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; +def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; +} // End neverHasSideEffects = 1 +////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; +////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; +////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; +////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; +////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; +////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; +////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; +////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>; +//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>; +//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; +def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; +//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; +//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; +//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; +////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; +////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; +////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; +////def S_BITSET1_B64 : SOP1_BITSET1 <0x0000001e, "S_BITSET1_B64", []>; +def S_GETPC_B64 : SOP1_64 <0x0000001f, "S_GETPC_B64", []>; +def S_SETPC_B64 : SOP1_64 <0x00000020, "S_SETPC_B64", []>; +def S_SWAPPC_B64 : SOP1_64 <0x00000021, "S_SWAPPC_B64", []>; +def S_RFE_B64 : SOP1_64 <0x00000022, "S_RFE_B64", []>; + +let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC] in { + +def S_AND_SAVEEXEC_B64 : SOP1_64 <0x00000024, "S_AND_SAVEEXEC_B64", []>; +def S_OR_SAVEEXEC_B64 : SOP1_64 <0x00000025, "S_OR_SAVEEXEC_B64", []>; +def S_XOR_SAVEEXEC_B64 : SOP1_64 <0x00000026, "S_XOR_SAVEEXEC_B64", []>; +def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <0x00000027, "S_ANDN2_SAVEEXEC_B64", []>; +def S_ORN2_SAVEEXEC_B64 : SOP1_64 <0x00000028, "S_ORN2_SAVEEXEC_B64", []>; +def S_NAND_SAVEEXEC_B64 : SOP1_64 <0x00000029, "S_NAND_SAVEEXEC_B64", []>; +def S_NOR_SAVEEXEC_B64 : SOP1_64 <0x0000002a, "S_NOR_SAVEEXEC_B64", []>; +def S_XNOR_SAVEEXEC_B64 : SOP1_64 <0x0000002b, "S_XNOR_SAVEEXEC_B64", []>; + +} // End hasSideEffects = 1 + +def S_QUADMASK_B32 : SOP1_32 <0x0000002c, "S_QUADMASK_B32", []>; +def S_QUADMASK_B64 : SOP1_64 <0x0000002d, "S_QUADMASK_B64", []>; +def S_MOVRELS_B32 : SOP1_32 <0x0000002e, "S_MOVRELS_B32", []>; +def S_MOVRELS_B64 : SOP1_64 <0x0000002f, "S_MOVRELS_B64", []>; +def S_MOVRELD_B32 : SOP1_32 <0x00000030, "S_MOVRELD_B32", []>; +def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>; +//def S_CBRANCH_JOIN : SOP1_ <0x00000032, "S_CBRANCH_JOIN", []>; +def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; +def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; +def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; +def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; +def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; + +/* +This instruction is disabled for now until we can figure out how to teach +the instruction selector to correctly use the S_CMP* vs V_CMP* +instructions. + +When this instruction is enabled the code generator sometimes produces this +invalid sequence: + +SCC = S_CMPK_EQ_I32 SGPR0, imm +VCC = COPY SCC +VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1 + +def S_CMPK_EQ_I32 : SOPK < + 0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1), + "S_CMPK_EQ_I32", + [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))] +>; +*/ + +def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; +def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; +def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; +def S_CMPK_LT_I32 : SOPK_32 <0x00000007, "S_CMPK_LT_I32", []>; +def S_CMPK_LE_I32 : SOPK_32 <0x00000008, "S_CMPK_LE_I32", []>; +def S_CMPK_EQ_U32 : SOPK_32 <0x00000009, "S_CMPK_EQ_U32", []>; +def S_CMPK_LG_U32 : SOPK_32 <0x0000000a, "S_CMPK_LG_U32", []>; +def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; +def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; +def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; +def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; +def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; +def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>; +//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>; +def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>; +def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>; +def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; +//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; +//def EXP : EXP_ <0x00000000, "EXP", []>; + +defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32", []>; +defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), + (V_CMP_LT_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), + (V_CMP_EQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), + (V_CMP_LE_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), + (V_CMP_GT_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_LG_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), + (V_CMP_GE_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", []>; +defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", []>; +defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32", []>; +defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32", []>; +defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32", []>; +defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32", []>; +defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", []>; +def : Pat < + (i1 (setcc (f32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_NEQ_F32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32", []>; +defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32", []>; + +//Side effect is writing to EXEC +let hasSideEffects = 1 in { + +defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32", []>; +defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32", []>; +defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32", []>; +defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32", []>; +defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32", []>; +defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32", []>; +defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32", []>; +defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32", []>; +defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32", []>; +defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32", []>; +defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32", []>; +defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32", []>; +defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32", []>; +defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32", []>; +defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32", []>; +defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32", []>; + +} // End hasSideEffects = 1 + +defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64", []>; +defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", []>; +defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", []>; +defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", []>; +defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", []>; +defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64", []>; +defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", []>; +defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", []>; +defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", []>; +defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64", []>; +defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64", []>; +defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64", []>; +defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64", []>; +defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", []>; +defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64", []>; +defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64", []>; + +//Side effect is writing to EXEC +let hasSideEffects = 1 in { + +defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64", []>; +defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64", []>; +defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64", []>; +defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64", []>; +defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64", []>; +defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64", []>; +defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64", []>; +defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64", []>; +defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64", []>; +defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64", []>; +defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64", []>; +defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64", []>; +defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64", []>; +defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64", []>; +defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64", []>; +defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64", []>; + +} // End hasSideEffects = 1 + +defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32", []>; +defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32", []>; +defm V_CMPS_EQ_F32 : VOPC_32 <0x00000042, "V_CMPS_EQ_F32", []>; +defm V_CMPS_LE_F32 : VOPC_32 <0x00000043, "V_CMPS_LE_F32", []>; +defm V_CMPS_GT_F32 : VOPC_32 <0x00000044, "V_CMPS_GT_F32", []>; +defm V_CMPS_LG_F32 : VOPC_32 <0x00000045, "V_CMPS_LG_F32", []>; +defm V_CMPS_GE_F32 : VOPC_32 <0x00000046, "V_CMPS_GE_F32", []>; +defm V_CMPS_O_F32 : VOPC_32 <0x00000047, "V_CMPS_O_F32", []>; +defm V_CMPS_U_F32 : VOPC_32 <0x00000048, "V_CMPS_U_F32", []>; +defm V_CMPS_NGE_F32 : VOPC_32 <0x00000049, "V_CMPS_NGE_F32", []>; +defm V_CMPS_NLG_F32 : VOPC_32 <0x0000004a, "V_CMPS_NLG_F32", []>; +defm V_CMPS_NGT_F32 : VOPC_32 <0x0000004b, "V_CMPS_NGT_F32", []>; +defm V_CMPS_NLE_F32 : VOPC_32 <0x0000004c, "V_CMPS_NLE_F32", []>; +defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32", []>; +defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32", []>; +defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32", []>; +defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32", []>; +defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32", []>; +defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32", []>; +defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32", []>; +defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32", []>; +defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32", []>; +defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32", []>; +defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32", []>; +defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32", []>; +defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32", []>; +defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32", []>; +defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32", []>; +defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32", []>; +defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32", []>; +defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32", []>; +defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32", []>; +defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64", []>; +defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64", []>; +defm V_CMPS_EQ_F64 : VOPC_64 <0x00000062, "V_CMPS_EQ_F64", []>; +defm V_CMPS_LE_F64 : VOPC_64 <0x00000063, "V_CMPS_LE_F64", []>; +defm V_CMPS_GT_F64 : VOPC_64 <0x00000064, "V_CMPS_GT_F64", []>; +defm V_CMPS_LG_F64 : VOPC_64 <0x00000065, "V_CMPS_LG_F64", []>; +defm V_CMPS_GE_F64 : VOPC_64 <0x00000066, "V_CMPS_GE_F64", []>; +defm V_CMPS_O_F64 : VOPC_64 <0x00000067, "V_CMPS_O_F64", []>; +defm V_CMPS_U_F64 : VOPC_64 <0x00000068, "V_CMPS_U_F64", []>; +defm V_CMPS_NGE_F64 : VOPC_64 <0x00000069, "V_CMPS_NGE_F64", []>; +defm V_CMPS_NLG_F64 : VOPC_64 <0x0000006a, "V_CMPS_NLG_F64", []>; +defm V_CMPS_NGT_F64 : VOPC_64 <0x0000006b, "V_CMPS_NGT_F64", []>; +defm V_CMPS_NLE_F64 : VOPC_64 <0x0000006c, "V_CMPS_NLE_F64", []>; +defm V_CMPS_NEQ_F64 : VOPC_64 <0x0000006d, "V_CMPS_NEQ_F64", []>; +defm V_CMPS_NLT_F64 : VOPC_64 <0x0000006e, "V_CMPS_NLT_F64", []>; +defm V_CMPS_TRU_F64 : VOPC_64 <0x0000006f, "V_CMPS_TRU_F64", []>; +defm V_CMPSX_F_F64 : VOPC_64 <0x00000070, "V_CMPSX_F_F64", []>; +defm V_CMPSX_LT_F64 : VOPC_64 <0x00000071, "V_CMPSX_LT_F64", []>; +defm V_CMPSX_EQ_F64 : VOPC_64 <0x00000072, "V_CMPSX_EQ_F64", []>; +defm V_CMPSX_LE_F64 : VOPC_64 <0x00000073, "V_CMPSX_LE_F64", []>; +defm V_CMPSX_GT_F64 : VOPC_64 <0x00000074, "V_CMPSX_GT_F64", []>; +defm V_CMPSX_LG_F64 : VOPC_64 <0x00000075, "V_CMPSX_LG_F64", []>; +defm V_CMPSX_GE_F64 : VOPC_64 <0x00000076, "V_CMPSX_GE_F64", []>; +defm V_CMPSX_O_F64 : VOPC_64 <0x00000077, "V_CMPSX_O_F64", []>; +defm V_CMPSX_U_F64 : VOPC_64 <0x00000078, "V_CMPSX_U_F64", []>; +defm V_CMPSX_NGE_F64 : VOPC_64 <0x00000079, "V_CMPSX_NGE_F64", []>; +defm V_CMPSX_NLG_F64 : VOPC_64 <0x0000007a, "V_CMPSX_NLG_F64", []>; +defm V_CMPSX_NGT_F64 : VOPC_64 <0x0000007b, "V_CMPSX_NGT_F64", []>; +defm V_CMPSX_NLE_F64 : VOPC_64 <0x0000007c, "V_CMPSX_NLE_F64", []>; +defm V_CMPSX_NEQ_F64 : VOPC_64 <0x0000007d, "V_CMPSX_NEQ_F64", []>; +defm V_CMPSX_NLT_F64 : VOPC_64 <0x0000007e, "V_CMPSX_NLT_F64", []>; +defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64", []>; +defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32", []>; +defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LT)), + (V_CMP_LT_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_EQ)), + (V_CMP_EQ_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_LE)), + (V_CMP_LE_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GT)), + (V_CMP_GT_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_NE)), + (V_CMP_NE_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", []>; +def : Pat < + (i1 (setcc (i32 AllReg_32:$src0), VReg_32:$src1, COND_GE)), + (V_CMP_GE_I32_e64 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32", []>; + +let hasSideEffects = 1 in { + +defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32", []>; +defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32", []>; +defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32", []>; +defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32", []>; +defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32", []>; +defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32", []>; +defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32", []>; +defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32", []>; + +} // End hasSideEffects + +defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64", []>; +defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", []>; +defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", []>; +defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", []>; +defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", []>; +defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", []>; +defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", []>; +defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64", []>; + +let hasSideEffects = 1 in { + +defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64", []>; +defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64", []>; +defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64", []>; +defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64", []>; +defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64", []>; +defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64", []>; +defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64", []>; +defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64", []>; + +} // End hasSideEffects + +defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32", []>; +defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", []>; +defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", []>; +defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", []>; +defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", []>; +defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", []>; +defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", []>; +defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32", []>; + +let hasSideEffects = 1 in { + +defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32", []>; +defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32", []>; +defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32", []>; +defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32", []>; +defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32", []>; +defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32", []>; +defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32", []>; +defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32", []>; + +} // End hasSideEffects + +defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64", []>; +defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", []>; +defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", []>; +defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", []>; +defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", []>; +defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", []>; +defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", []>; +defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64", []>; +defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64", []>; +defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64", []>; +defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64", []>; +defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64", []>; +defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64", []>; +defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64", []>; +defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64", []>; +defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64", []>; +defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32", []>; +defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32", []>; +defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64", []>; +defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64", []>; +//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; +//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; +//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "BUFFER_LOAD_FORMAT_XYZ", []>; +def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT_XYZW", VReg_128>; +//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "BUFFER_STORE_FORMAT_X", []>; +//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; +//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; +//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; +//def BUFFER_LOAD_UBYTE : MUBUF_ <0x00000008, "BUFFER_LOAD_UBYTE", []>; +//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; +//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; +//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; +//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; +//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; +//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; +//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; +//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; +//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>; +//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>; +//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; +//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; +//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; +//def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; +//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; +//def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; +//def BUFFER_ATOMIC_UMIN : MUBUF_ <0x00000036, "BUFFER_ATOMIC_UMIN", []>; +//def BUFFER_ATOMIC_SMAX : MUBUF_ <0x00000037, "BUFFER_ATOMIC_SMAX", []>; +//def BUFFER_ATOMIC_UMAX : MUBUF_ <0x00000038, "BUFFER_ATOMIC_UMAX", []>; +//def BUFFER_ATOMIC_AND : MUBUF_ <0x00000039, "BUFFER_ATOMIC_AND", []>; +//def BUFFER_ATOMIC_OR : MUBUF_ <0x0000003a, "BUFFER_ATOMIC_OR", []>; +//def BUFFER_ATOMIC_XOR : MUBUF_ <0x0000003b, "BUFFER_ATOMIC_XOR", []>; +//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "BUFFER_ATOMIC_INC", []>; +//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "BUFFER_ATOMIC_DEC", []>; +//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "BUFFER_ATOMIC_FCMPSWAP", []>; +//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "BUFFER_ATOMIC_FMIN", []>; +//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "BUFFER_ATOMIC_FMAX", []>; +//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "BUFFER_ATOMIC_SWAP_X2", []>; +//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "BUFFER_ATOMIC_CMPSWAP_X2", []>; +//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "BUFFER_ATOMIC_ADD_X2", []>; +//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "BUFFER_ATOMIC_SUB_X2", []>; +//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "BUFFER_ATOMIC_RSUB_X2", []>; +//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "BUFFER_ATOMIC_SMIN_X2", []>; +//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "BUFFER_ATOMIC_UMIN_X2", []>; +//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "BUFFER_ATOMIC_SMAX_X2", []>; +//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "BUFFER_ATOMIC_UMAX_X2", []>; +//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "BUFFER_ATOMIC_AND_X2", []>; +//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "BUFFER_ATOMIC_OR_X2", []>; +//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "BUFFER_ATOMIC_XOR_X2", []>; +//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "BUFFER_ATOMIC_INC_X2", []>; +//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "BUFFER_ATOMIC_DEC_X2", []>; +//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "BUFFER_ATOMIC_FCMPSWAP_X2", []>; +//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "BUFFER_ATOMIC_FMIN_X2", []>; +//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>; +//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>; +//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>; +//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>; +//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; +//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; +def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; +//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; +//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; +//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; +//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; + +defm S_LOAD_DWORD : SMRD_32 <0x00000000, "S_LOAD_DWORD", SReg_32>; + +//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128, v4i32>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256, v8i32>; +//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; +//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; +//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; +//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; +//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; +//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; + +//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; +//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; +//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>; +//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>; +//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; +//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>; +//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>; +//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK_SGN", 0x00000005>; +//def IMAGE_STORE : MIMG_NoPattern_ <"IMAGE_STORE", 0x00000008>; +//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>; +//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>; +//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>; +//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>; +//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>; +//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>; +//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>; +//def IMAGE_ATOMIC_SUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_SUB", 0x00000012>; +//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"IMAGE_ATOMIC_RSUB", 0x00000013>; +//def IMAGE_ATOMIC_SMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMIN", 0x00000014>; +//def IMAGE_ATOMIC_UMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMIN", 0x00000015>; +//def IMAGE_ATOMIC_SMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_SMAX", 0x00000016>; +//def IMAGE_ATOMIC_UMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_UMAX", 0x00000017>; +//def IMAGE_ATOMIC_AND : MIMG_NoPattern_ <"IMAGE_ATOMIC_AND", 0x00000018>; +//def IMAGE_ATOMIC_OR : MIMG_NoPattern_ <"IMAGE_ATOMIC_OR", 0x00000019>; +//def IMAGE_ATOMIC_XOR : MIMG_NoPattern_ <"IMAGE_ATOMIC_XOR", 0x0000001a>; +//def IMAGE_ATOMIC_INC : MIMG_NoPattern_ <"IMAGE_ATOMIC_INC", 0x0000001b>; +//def IMAGE_ATOMIC_DEC : MIMG_NoPattern_ <"IMAGE_ATOMIC_DEC", 0x0000001c>; +//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>; +//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>; +//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>; +def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; +//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>; +def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">; +//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>; +def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">; +def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">; +//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>; +//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>; +//def IMAGE_SAMPLE_C : MIMG_NoPattern_ <"IMAGE_SAMPLE_C", 0x00000028>; +//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>; +//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>; +//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>; +//def IMAGE_SAMPLE_C_L : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L", 0x0000002c>; +//def IMAGE_SAMPLE_C_B : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B", 0x0000002d>; +//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>; +//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>; +//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>; +//def IMAGE_SAMPLE_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL_O", 0x00000031>; +//def IMAGE_SAMPLE_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_O", 0x00000032>; +//def IMAGE_SAMPLE_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL_O", 0x00000033>; +//def IMAGE_SAMPLE_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_L_O", 0x00000034>; +//def IMAGE_SAMPLE_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_O", 0x00000035>; +//def IMAGE_SAMPLE_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL_O", 0x00000036>; +//def IMAGE_SAMPLE_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ_O", 0x00000037>; +//def IMAGE_SAMPLE_C_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_O", 0x00000038>; +//def IMAGE_SAMPLE_C_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL_O", 0x00000039>; +//def IMAGE_SAMPLE_C_D_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_O", 0x0000003a>; +//def IMAGE_SAMPLE_C_D_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL_O", 0x0000003b>; +//def IMAGE_SAMPLE_C_L_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_L_O", 0x0000003c>; +//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; +//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; +//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; +//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; +//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; +//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; +//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; +//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; +//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; +//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; +//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; +//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; +//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; +//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; +//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; +//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; +//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; +//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; +//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; +//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; +//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; +//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; +//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; +//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; +//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; +//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; +//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; +//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; +//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; +//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; +//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; +//def IMAGE_SAMPLE_C_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL", 0x0000006b>; +//def IMAGE_SAMPLE_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_O", 0x0000006c>; +//def IMAGE_SAMPLE_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL_O", 0x0000006d>; +//def IMAGE_SAMPLE_C_CD_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_O", 0x0000006e>; +//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>; +//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>; +//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; +//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; + +let neverHasSideEffects = 1 in { +defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; +} // End neverHasSideEffects +defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>; +//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>; +//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>; +defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32", + [(set VReg_32:$dst, (sint_to_fp AllReg_32:$src0))] +>; +//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>; +//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>; +defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", + [(set VReg_32:$dst, (fp_to_sint AllReg_32:$src0))] +>; +defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; +////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; +//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; +//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; +//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; +//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; +//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>; +//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>; +//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; +//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; +//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; +//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; +//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; +//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; +defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", + [(set VReg_32:$dst, (AMDGPUfract AllReg_32:$src0))] +>; +defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>; +defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", []>; +defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32", + [(set VReg_32:$dst, (frint AllReg_32:$src0))] +>; +defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32", + [(set VReg_32:$dst, (ffloor AllReg_32:$src0))] +>; +defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32", + [(set VReg_32:$dst, (fexp2 AllReg_32:$src0))] +>; +defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; +defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", []>; +defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; +defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; +defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", + [(set VReg_32:$dst, (fdiv FP_ONE, AllReg_32:$src0))] +>; +defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; +defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; +defm V_RSQ_LEGACY_F32 : VOP1_32 < + 0x0000002d, "V_RSQ_LEGACY_F32", + [(set VReg_32:$dst, (int_AMDGPU_rsq AllReg_32:$src0))] +>; +defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; +defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>; +defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; +defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; +defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; +defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", []>; +defm V_SQRT_F64 : VOP1_64 <0x00000034, "V_SQRT_F64", []>; +defm V_SIN_F32 : VOP1_32 <0x00000035, "V_SIN_F32", []>; +defm V_COS_F32 : VOP1_32 <0x00000036, "V_COS_F32", []>; +defm V_NOT_B32 : VOP1_32 <0x00000037, "V_NOT_B32", []>; +defm V_BFREV_B32 : VOP1_32 <0x00000038, "V_BFREV_B32", []>; +defm V_FFBH_U32 : VOP1_32 <0x00000039, "V_FFBH_U32", []>; +defm V_FFBL_B32 : VOP1_32 <0x0000003a, "V_FFBL_B32", []>; +defm V_FFBH_I32 : VOP1_32 <0x0000003b, "V_FFBH_I32", []>; +//defm V_FREXP_EXP_I32_F64 : VOP1_32 <0x0000003c, "V_FREXP_EXP_I32_F64", []>; +defm V_FREXP_MANT_F64 : VOP1_64 <0x0000003d, "V_FREXP_MANT_F64", []>; +defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>; +//defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>; +defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>; +//def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>; +defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; +defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; +defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; + +def V_INTERP_P1_F32 : VINTRP < + 0x00000000, + (outs VReg_32:$dst), + (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_P1_F32", + []> { + let DisableEncoding = "$m0"; +} + +def V_INTERP_P2_F32 : VINTRP < + 0x00000001, + (outs VReg_32:$dst), + (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_P2_F32", + []> { + + let Constraints = "$src0 = $dst"; + let DisableEncoding = "$src0,$m0"; + +} + +def V_INTERP_MOV_F32 : VINTRP < + 0x00000002, + (outs VReg_32:$dst), + (ins i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_MOV_F32", + []> { + let VSRC = 0; + let DisableEncoding = "$m0"; +} + +//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; + +let isTerminator = 1 in { + +def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", + [(IL_retflag)]> { + let SIMM16 = 0; + let isBarrier = 1; + let hasCtrlDep = 1; +} + +let isBranch = 1 in { +def S_BRANCH : SOPP < + 0x00000002, (ins brtarget:$target), "S_BRANCH", + [(br bb:$target)]> { + let isBarrier = 1; +} + +let DisableEncoding = "$scc" in { +def S_CBRANCH_SCC0 : SOPP < + 0x00000004, (ins brtarget:$target, SCCReg:$scc), + "S_CBRANCH_SCC0", [] +>; +def S_CBRANCH_SCC1 : SOPP < + 0x00000005, (ins brtarget:$target, SCCReg:$scc), + "S_CBRANCH_SCC1", + [] +>; +} // End DisableEncoding = "$scc" + +def S_CBRANCH_VCCZ : SOPP < + 0x00000006, (ins brtarget:$target, VCCReg:$vcc), + "S_CBRANCH_VCCZ", + [] +>; +def S_CBRANCH_VCCNZ : SOPP < + 0x00000007, (ins brtarget:$target, VCCReg:$vcc), + "S_CBRANCH_VCCNZ", + [] +>; + +let DisableEncoding = "$exec" in { +def S_CBRANCH_EXECZ : SOPP < + 0x00000008, (ins brtarget:$target, EXECReg:$exec), + "S_CBRANCH_EXECZ", + [] +>; +def S_CBRANCH_EXECNZ : SOPP < + 0x00000009, (ins brtarget:$target, EXECReg:$exec), + "S_CBRANCH_EXECNZ", + [] +>; +} // End DisableEncoding = "$exec" + + +} // End isBranch = 1 +} // End isTerminator = 1 + +//def S_BARRIER : SOPP_ <0x0000000a, "S_BARRIER", []>; +let hasSideEffects = 1 in { +def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16", + [] +>; +} // End hasSideEffects +//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; +//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; +//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; +//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>; +//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; +//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; +//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; +//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; +//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; +//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; + +def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), + (ins AllReg_32:$src0, VReg_32:$src1, VCCReg:$vcc), "V_CNDMASK_B32_e32", + [] +>{ + let DisableEncoding = "$vcc"; +} + +def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), + (ins VReg_32:$src0, VReg_32:$src1, SReg_1:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), + "V_CNDMASK_B32_e64", + [(set (i32 VReg_32:$dst), (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0))] +>; + +//f32 pattern for V_CNDMASK_B32_e64 +def : Pat < + (f32 (select SReg_1:$src2, VReg_32:$src1, VReg_32:$src0)), + (V_CNDMASK_B32_e64 VReg_32:$src0, VReg_32:$src1, SReg_1:$src2) +>; + +defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>; +defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>; + +defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", []>; +def : Pat < + (f32 (fadd AllReg_32:$src0, VReg_32:$src1)), + (V_ADD_F32_e32 AllReg_32:$src0, VReg_32:$src1) +>; + +defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", []>; +def : Pat < + (f32 (fsub AllReg_32:$src0, VReg_32:$src1)), + (V_SUB_F32_e32 AllReg_32:$src0, VReg_32:$src1) +>; +defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; +defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; +defm V_MUL_LEGACY_F32 : VOP2_32 < + 0x00000007, "V_MUL_LEGACY_F32", + [(set VReg_32:$dst, (int_AMDGPU_mul AllReg_32:$src0, VReg_32:$src1))] +>; + +defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", + [(set VReg_32:$dst, (fmul AllReg_32:$src0, VReg_32:$src1))] +>; +//defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", []>; +//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; +//defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", []>; +//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; +defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32", + [(set VReg_32:$dst, (AMDGPUfmin AllReg_32:$src0, VReg_32:$src1))] +>; + +defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", + [(set VReg_32:$dst, (AMDGPUfmax AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; +defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; +defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; +defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; +defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; +defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; +defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; +defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; +defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", + [(set VReg_32:$dst, (and AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", + [(set VReg_32:$dst, (or AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", + [(set VReg_32:$dst, (xor AllReg_32:$src0, VReg_32:$src1))] +>; +defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", []>; +defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; +defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; +defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; +//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; +//defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; +//defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; +let Defs = [VCC] in { // Carry-out goes to VCC +defm V_ADD_I32 : VOP2_32 <0x00000025, "V_ADD_I32", + [(set VReg_32:$dst, (add (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] +>; +defm V_SUB_I32 : VOP2_32 <0x00000026, "V_SUB_I32", + [(set VReg_32:$dst, (sub (i32 AllReg_32:$src0), (i32 VReg_32:$src1)))] +>; +} // End Defs = [VCC] +defm V_SUBREV_I32 : VOP2_32 <0x00000027, "V_SUBREV_I32", []>; +defm V_ADDC_U32 : VOP2_32 <0x00000028, "V_ADDC_U32", []>; +defm V_SUBB_U32 : VOP2_32 <0x00000029, "V_SUBB_U32", []>; +defm V_SUBBREV_U32 : VOP2_32 <0x0000002a, "V_SUBBREV_U32", []>; +defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; +////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; +////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; +////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", + [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] +>; +////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; +////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; +def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; +def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>; +def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>; +def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>; +def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>; +def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>; +def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>; +def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>; +def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>; +def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>; +def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>; +def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>; +////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; +////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; +////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; +////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; +//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; + +let neverHasSideEffects = 1 in { + +def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; +def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; +//def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", []>; +//def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", []>; + +} // End neverHasSideEffects +def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; +def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; +def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; +def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; +def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; +def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; +def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>; +def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>; +def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>; +//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; +def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; +def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; +def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; +////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; +////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; +////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; +////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; +////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; +////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; +////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; +////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; +////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; +//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; +//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; +//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; +def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; +////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; +def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; +def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; +def V_LSHL_B64 : VOP3_64 <0x00000161, "V_LSHL_B64", []>; +def V_LSHR_B64 : VOP3_64 <0x00000162, "V_LSHR_B64", []>; +def V_ASHR_I64 : VOP3_64 <0x00000163, "V_ASHR_I64", []>; +def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; +def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; +def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; +def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; +def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; +def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; +def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; +def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; +def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; +def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; +def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; +def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; +//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; +//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; +//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; +def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; +def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; +def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; +def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>; +def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>; +def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>; +def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>; +def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; +def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; +def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; +def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; + +def S_CSELECT_B32 : SOP2 < + 0x0000000a, (outs SReg_32:$dst), + (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", + [(set (i32 SReg_32:$dst), (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1))] +>; + +def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; + +// f32 pattern for S_CSELECT_B32 +def : Pat < + (f32 (select SCCReg:$scc, SReg_32:$src0, SReg_32:$src1)), + (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc) +>; + +def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; + +def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", + [(set SReg_64:$dst, (and SReg_64:$src0, SReg_64:$src1))] +>; +def S_AND_VCC : SOP2_VCC <0x0000000f, "S_AND_B64", + [(set SReg_1:$vcc, (SIvcc_and SReg_64:$src0, SReg_64:$src1))] +>; +def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; +def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; +def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; +def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>; +def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; +def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; +def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>; +def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>; +def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; +def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; +def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; +def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; +def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; +def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; +def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>; +def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>; +def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>; +def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>; +def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>; +def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>; +def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; +def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; +def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; +def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; +def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; +def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; +def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; +//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; +def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; + +class V_MOV_IMM <Operand immType, SDNode immNode> : InstSI < + (outs VReg_32:$dst), + (ins immType:$src0), + "V_MOV_IMM", + [(set VReg_32:$dst, (immNode:$src0))] +>; + +let isCodeGenOnly = 1, isPseudo = 1 in { + +def V_MOV_IMM_I32 : V_MOV_IMM<i32imm, imm>; +def V_MOV_IMM_F32 : V_MOV_IMM<f32imm, fpimm>; + +def S_MOV_IMM_I32 : InstSI < + (outs SReg_32:$dst), + (ins i32imm:$src0), + "S_MOV_IMM_I32", + [(set SReg_32:$dst, (imm:$src0))] +>; + +// i64 immediates aren't really supported in hardware, but LLVM will use the i64 +// type for indices on load and store instructions. The pattern for +// S_MOV_IMM_I64 will only match i64 immediates that can fit into 32-bits, +// which the hardware can handle. +def S_MOV_IMM_I64 : InstSI < + (outs SReg_64:$dst), + (ins i64imm:$src0), + "S_MOV_IMM_I64 $dst, $src0", + [(set SReg_64:$dst, (IMM32bitIn64bit:$src0))] +>; + +} // End isCodeGenOnly, isPseudo = 1 + +class SI_LOAD_LITERAL<Operand ImmType> : + Enc32 <(outs), (ins ImmType:$imm), "LOAD_LITERAL $imm", []> { + + bits<32> imm; + let Inst{31-0} = imm; +} + +def SI_LOAD_LITERAL_I32 : SI_LOAD_LITERAL<i32imm>; +def SI_LOAD_LITERAL_F32 : SI_LOAD_LITERAL<f32imm>; + +let isCodeGenOnly = 1, isPseudo = 1 in { + +def SET_M0 : InstSI < + (outs SReg_32:$dst), + (ins i32imm:$src0), + "SET_M0", + [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] +>; + +def LOAD_CONST : AMDGPUShaderInst < + (outs GPRF32:$dst), + (ins i32imm:$src), + "LOAD_CONST $dst, $src", + [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] +>; + +let usesCustomInserter = 1 in { + +def SI_V_CNDLT : InstSI < + (outs VReg_32:$dst), + (ins VReg_32:$src0, VReg_32:$src1, VReg_32:$src2), + "SI_V_CNDLT $dst, $src0, $src1, $src2", + [(set VReg_32:$dst, (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2))] +>; + +def SI_INTERP : InstSI < + (outs VReg_32:$dst), + (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), + "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params", + [] +>; + +def SI_INTERP_CONST : InstSI < + (outs VReg_32:$dst), + (ins i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), + "SI_INTERP_CONST $dst, $attr_chan, $attr, $params", + [(set VReg_32:$dst, (int_SI_fs_interp_constant imm:$attr_chan, + imm:$attr, SReg_32:$params))] +>; + +def SI_KIL : InstSI < + (outs), + (ins VReg_32:$src), + "SI_KIL $src", + [(int_AMDGPU_kill VReg_32:$src)] +>; + +def SI_WQM : InstSI < + (outs), + (ins), + "SI_WQM", + [(int_SI_wqm)] +>; + +} // end usesCustomInserter + +// SI Psuedo instructions. These are used by the CFG structurizer pass +// and should be lowered to ISA instructions prior to codegen. + +let mayLoad = 1, mayStore = 1, hasSideEffects = 1, + Uses = [EXEC], Defs = [EXEC] in { + +let isBranch = 1, isTerminator = 1 in { + +def SI_IF : InstSI < + (outs SReg_64:$dst), + (ins SReg_1:$vcc, brtarget:$target), + "SI_IF", + [(set SReg_64:$dst, (int_SI_if SReg_1:$vcc, bb:$target))] +>; + +def SI_ELSE : InstSI < + (outs SReg_64:$dst), + (ins SReg_64:$src, brtarget:$target), + "SI_ELSE", + [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> { + + let Constraints = "$src = $dst"; +} + +def SI_LOOP : InstSI < + (outs), + (ins SReg_64:$saved, brtarget:$target), + "SI_LOOP", + [(int_SI_loop SReg_64:$saved, bb:$target)] +>; + +} // end isBranch = 1, isTerminator = 1 + +def SI_BREAK : InstSI < + (outs SReg_64:$dst), + (ins SReg_64:$src), + "SI_ELSE", + [(set SReg_64:$dst, (int_SI_break SReg_64:$src))] +>; + +def SI_IF_BREAK : InstSI < + (outs SReg_64:$dst), + (ins SReg_1:$vcc, SReg_64:$src), + "SI_IF_BREAK", + [(set SReg_64:$dst, (int_SI_if_break SReg_1:$vcc, SReg_64:$src))] +>; + +def SI_ELSE_BREAK : InstSI < + (outs SReg_64:$dst), + (ins SReg_64:$src0, SReg_64:$src1), + "SI_ELSE_BREAK", + [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))] +>; + +def SI_END_CF : InstSI < + (outs), + (ins SReg_64:$saved), + "SI_END_CF", + [(int_SI_end_cf SReg_64:$saved)] +>; + +} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 + // Uses = [EXEC], Defs = [EXEC] + +} // end IsCodeGenOnly, isPseudo + +/* int_SI_vs_load_input */ +def : Pat< + (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset, + VReg_32:$buf_idx_vgpr), + (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0, + VReg_32:$buf_idx_vgpr, SReg_128:$tlst, + 0, 0, (i32 SREG_LIT_0)) +>; + +/* int_SI_export */ +def : Pat < + (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, + VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3), + (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, + VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3) +>; + +/* int_SI_sample */ +def : Pat < + (int_SI_sample imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, + SReg_256:$rsrc, SReg_128:$sampler) +>; + +/* int_SI_sample_lod */ +def : Pat < + (int_SI_sample_lod imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (IMAGE_SAMPLE_L imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, + SReg_256:$rsrc, SReg_128:$sampler) +>; + +/* int_SI_sample_bias */ +def : Pat < + (int_SI_sample_bias imm:$writemask, VReg_128:$coord, SReg_256:$rsrc, SReg_128:$sampler), + (IMAGE_SAMPLE_B imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_128:$coord, + SReg_256:$rsrc, SReg_128:$sampler) +>; + +def CLAMP_SI : CLAMP<VReg_32>; +def FABS_SI : FABS<VReg_32>; +def FNEG_SI : FNEG<VReg_32>; + +def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>; +def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>; +def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>; +def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>; + +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>; +def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>; + +def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; +def : Vector_Build <v4i32, SReg_128, i32, SReg_32>; + +def : BitConvert <i32, f32, SReg_32>; +def : BitConvert <i32, f32, VReg_32>; + +def : BitConvert <f32, i32, SReg_32>; +def : BitConvert <f32, i32, VReg_32>; + +def : Pat < + (i64 (SIsreg1_bitcast SReg_1:$vcc)), + (S_MOV_B64 (COPY_TO_REGCLASS SReg_1:$vcc, SReg_64)) +>; + +def : Pat < + (i1 (SIsreg1_bitcast SReg_64:$vcc)), + (COPY_TO_REGCLASS SReg_64:$vcc, SReg_1) +>; + +def : Pat < + (i64 (SIvcc_bitcast VCCReg:$vcc)), + (S_MOV_B64 (COPY_TO_REGCLASS VCCReg:$vcc, SReg_64)) +>; + +def : Pat < + (i1 (SIvcc_bitcast SReg_64:$vcc)), + (COPY_TO_REGCLASS SReg_64:$vcc, VCCReg) +>; + +/********** ===================== **********/ +/********** Interpolation Paterns **********/ +/********** ===================== **********/ + +def : Pat < + (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), + (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan, + imm:$attr, SReg_32:$params) +>; + +def : Pat < + (int_SI_fs_read_face), + (f32 FRONT_FACE) +>; + +def : Pat < + (int_SI_fs_read_pos 0), + (f32 POS_X_FLOAT) +>; + +def : Pat < + (int_SI_fs_read_pos 1), + (f32 POS_Y_FLOAT) +>; + +def : Pat < + (int_SI_fs_read_pos 2), + (f32 POS_Z_FLOAT) +>; + +def : Pat < + (int_SI_fs_read_pos 3), + (f32 POS_W_FLOAT) +>; + +/********** ================== **********/ +/********** Intrinsic Patterns **********/ +/********** ================== **********/ + +/* llvm.AMDGPU.pow */ +/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ +def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>; + +def : Pat < + (int_AMDGPU_div AllReg_32:$src0, AllReg_32:$src1), + (V_MUL_LEGACY_F32_e32 AllReg_32:$src0, (V_RCP_LEGACY_F32_e32 AllReg_32:$src1)) +>; + +def : Pat< + (fdiv AllReg_32:$src0, AllReg_32:$src1), + (V_MUL_F32_e32 AllReg_32:$src0, (V_RCP_F32_e32 AllReg_32:$src1)) +>; + +def : Pat < + (int_AMDGPU_kilp), + (SI_KIL (V_MOV_IMM_I32 0xbf800000)) +>; + +def : Pat < + (int_AMDGPU_cube VReg_128:$src), + (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), + (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), + (EXTRACT_SUBREG VReg_128:$src, sel_y), + (EXTRACT_SUBREG VReg_128:$src, sel_z), + 0, 0, 0, 0), sel_x), + (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), + (EXTRACT_SUBREG VReg_128:$src, sel_y), + (EXTRACT_SUBREG VReg_128:$src, sel_z), + 0, 0, 0, 0), sel_y), + (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), + (EXTRACT_SUBREG VReg_128:$src, sel_y), + (EXTRACT_SUBREG VReg_128:$src, sel_z), + 0, 0, 0, 0), sel_z), + (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x), + (EXTRACT_SUBREG VReg_128:$src, sel_y), + (EXTRACT_SUBREG VReg_128:$src, sel_z), + 0, 0, 0, 0), sel_w) +>; + +/********** ================== **********/ +/********** VOP3 Patterns **********/ +/********** ================== **********/ + +def : Pat <(f32 (IL_mad AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2)), + (V_MAD_LEGACY_F32 AllReg_32:$src0, VReg_32:$src1, VReg_32:$src2, + 0, 0, 0, 0)>; + +} // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td new file mode 100644 index 0000000000..c322fef0fe --- /dev/null +++ b/lib/Target/R600/SIIntrinsics.td @@ -0,0 +1,52 @@ +//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// SI Intrinsic Definitions +// +//===----------------------------------------------------------------------===// + + +let TargetPrefix = "SI", isTarget = 1 in { + + def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + /* XXX: We may need a seperate intrinsic here for loading integer values */ + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; + def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; + def int_SI_wqm : Intrinsic <[], [], []>; + + def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + def int_SI_sample_bias : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + def int_SI_sample_lod : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty], [IntrReadMem]>; + + /* Interpolation Intrinsics */ + + def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>; + class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; + + def int_SI_fs_interp_linear_center : Interp; + def int_SI_fs_interp_linear_centroid : Interp; + def int_SI_fs_interp_persp_center : Interp; + def int_SI_fs_interp_persp_centroid : Interp; + def int_SI_fs_interp_constant : Interp; + + def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; + def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + + /* Control flow Intrinsics */ + + def int_SI_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>; + def int_SI_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>; + def int_SI_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>; + def int_SI_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>; + def int_SI_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>; + def int_SI_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>; + def int_SI_end_cf : Intrinsic<[], [llvm_i64_ty], []>; +} diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp new file mode 100644 index 0000000000..3fbe653a49 --- /dev/null +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -0,0 +1,331 @@ +//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass lowers the pseudo control flow instructions to real +/// machine instructions. +/// +/// All control flow is handled using predicated instructions and +/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector +/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs +/// by writting to the 64-bit EXEC register (each bit corresponds to a +/// single vector ALU). Typically, for predicates, a vector ALU will write +/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each +/// Vector ALU) and then the ScalarALU will AND the VCC register with the +/// EXEC to update the predicates. +/// +/// For example: +/// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2 +/// %SGPR0 = SI_IF %VCC +/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 +/// %SGPR0 = SI_ELSE %SGPR0 +/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0 +/// SI_END_CF %SGPR0 +/// +/// becomes: +/// +/// %SGPR0 = S_AND_SAVEEXEC_B64 %VCC // Save and update the exec mask +/// %SGPR0 = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask +/// S_CBRANCH_EXECZ label0 // This instruction is an optional +/// // optimization which allows us to +/// // branch if all the bits of +/// // EXEC are zero. +/// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch +/// +/// label0: +/// %SGPR0 = S_OR_SAVEEXEC_B64 %EXEC // Restore the exec mask for the Then block +/// %EXEC = S_XOR_B64 %SGPR0, %EXEC // Clear live bits from saved exec mask +/// S_BRANCH_EXECZ label1 // Use our branch optimization +/// // instruction again. +/// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block +/// label1: +/// %EXEC = S_OR_B64 %EXEC, %SGPR0 // Re-enable saved exec mask bits +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SILowerControlFlowPass : public MachineFunctionPass { + +private: + static const unsigned SkipThreshold = 12; + + static char ID; + const TargetInstrInfo *TII; + + void Skip(MachineInstr &MI, MachineOperand &To); + + void If(MachineInstr &MI); + void Else(MachineInstr &MI); + void Break(MachineInstr &MI); + void IfBreak(MachineInstr &MI); + void ElseBreak(MachineInstr &MI); + void Loop(MachineInstr &MI); + void EndCf(MachineInstr &MI); + + void Branch(MachineInstr &MI); + +public: + SILowerControlFlowPass(TargetMachine &tm) : + MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI Lower control flow instructions"; + } + +}; + +} // End anonymous namespace + +char SILowerControlFlowPass::ID = 0; + +FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) { + return new SILowerControlFlowPass(tm); +} + +void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) { + unsigned NumInstr = 0; + + for (MachineBasicBlock *MBB = *From.getParent()->succ_begin(); + NumInstr < SkipThreshold && MBB != To.getMBB() && !MBB->succ_empty(); + MBB = *MBB->succ_begin()) { + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + NumInstr < SkipThreshold && I != E; ++I) { + + if (I->isBundle() || !I->isBundled()) + ++NumInstr; + } + } + + if (NumInstr < SkipThreshold) + return; + + DebugLoc DL = From.getDebugLoc(); + BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) + .addOperand(To) + .addReg(AMDGPU::EXEC); +} + +void SILowerControlFlowPass::If(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned Vcc = MI.getOperand(1).getReg(); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg) + .addReg(Vcc); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg) + .addReg(AMDGPU::EXEC) + .addReg(Reg); + + Skip(MI, MI.getOperand(2)); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::Else(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) + .addReg(Src); // Saved EXEC + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(Dst); + + Skip(MI, MI.getOperand(2)); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::Break(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) + .addReg(AMDGPU::EXEC) + .addReg(Src); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::IfBreak(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Vcc = MI.getOperand(1).getReg(); + unsigned Src = MI.getOperand(2).getReg(); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) + .addReg(Vcc) + .addReg(Src); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::ElseBreak(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Saved = MI.getOperand(1).getReg(); + unsigned Src = MI.getOperand(2).getReg(); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst) + .addReg(Saved) + .addReg(Src); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::Loop(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Src = MI.getOperand(0).getReg(); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(Src); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addOperand(MI.getOperand(1)) + .addReg(AMDGPU::EXEC); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::EndCf(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Reg = MI.getOperand(0).getReg(); + + BuildMI(MBB, MBB.getFirstNonPHI(), DL, + TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(Reg); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::Branch(MachineInstr &MI) { + MachineBasicBlock *Next = MI.getParent()->getNextNode(); + MachineBasicBlock *Target = MI.getOperand(0).getMBB(); + if (Target == Next) + MI.eraseFromParent(); + else + assert(0); +} + +bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { + bool HaveCf = false; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + + Next = llvm::next(I); + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: break; + case AMDGPU::SI_IF: + If(MI); + break; + + case AMDGPU::SI_ELSE: + Else(MI); + break; + + case AMDGPU::SI_BREAK: + Break(MI); + break; + + case AMDGPU::SI_IF_BREAK: + IfBreak(MI); + break; + + case AMDGPU::SI_ELSE_BREAK: + ElseBreak(MI); + break; + + case AMDGPU::SI_LOOP: + Loop(MI); + break; + + case AMDGPU::SI_END_CF: + HaveCf = true; + EndCf(MI); + break; + + case AMDGPU::S_BRANCH: + Branch(MI); + break; + } + } + } + + // TODO: What is this good for? + unsigned ShaderType = MF.getInfo<SIMachineFunctionInfo>()->ShaderType; + if (HaveCf && ShaderType == ShaderType::PIXEL) { + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + if (MBB.succ_empty()) { + + MachineInstr &MI = *MBB.getFirstNonPHI(); + DebugLoc DL = MI.getDebugLoc(); + + // If the exec mask is non-zero, skip the next two instructions + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(3) + .addReg(AMDGPU::EXEC); + + // Exec mask is zero: Export to NULL target... + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::EXP)) + .addImm(0) + .addImm(0x09) // V_008DFC_SQ_EXP_NULL + .addImm(0) + .addImm(1) + .addImm(1) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0) + .addReg(AMDGPU::SREG_LIT_0); + + // ... and terminate wavefront + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ENDPGM)); + } + } + } + + return true; +} diff --git a/lib/Target/R600/SILowerLiteralConstants.cpp b/lib/Target/R600/SILowerLiteralConstants.cpp new file mode 100644 index 0000000000..c0411e9b4d --- /dev/null +++ b/lib/Target/R600/SILowerLiteralConstants.cpp @@ -0,0 +1,108 @@ +//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass performs the following transformation on instructions with +/// literal constants: +/// +/// %VGPR0 = V_MOV_IMM_I32 1 +/// +/// becomes: +/// +/// BUNDLE +/// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT +/// * SI_LOAD_LITERAL 1 +/// +/// The resulting sequence matches exactly how the hardware handles immediate +/// operands, so this transformation greatly simplifies the code generator. +/// +/// Only the *_MOV_IMM_* support immediate operands at the moment, but when +/// support for immediate operands is added to other instructions, they +/// will be lowered here as well. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" + +using namespace llvm; + +namespace { + +class SILowerLiteralConstantsPass : public MachineFunctionPass { + +private: + static char ID; + const TargetInstrInfo *TII; + +public: + SILowerLiteralConstantsPass(TargetMachine &tm) : + MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "SI Lower literal constants pass"; + } +}; + +} // End anonymous namespace + +char SILowerLiteralConstantsPass::ID = 0; + +FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) { + return new SILowerLiteralConstantsPass(tm); +} + +bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); + I != MBB.end(); I = Next) { + Next = llvm::next(I); + MachineInstr &MI = *I; + switch (MI.getOpcode()) { + default: break; + case AMDGPU::S_MOV_IMM_I32: + case AMDGPU::S_MOV_IMM_I64: + case AMDGPU::V_MOV_IMM_F32: + case AMDGPU::V_MOV_IMM_I32: { + unsigned MovOpcode; + unsigned LoadLiteralOpcode; + MachineOperand LiteralOp = MI.getOperand(1); + if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) { + MovOpcode = AMDGPU::V_MOV_B32_e32; + } else { + MovOpcode = AMDGPU::S_MOV_B32; + } + if (LiteralOp.isImm()) { + LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32; + } else { + LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32; + } + MIBundleBuilder Bundle(MBB, I); + Bundle + .append(BuildMI(MF, MBB.findDebugLoc(I), TII->get(MovOpcode), + MI.getOperand(0).getReg()) + .addReg(AMDGPU::SI_LITERAL_CONSTANT)) + .append(BuildMI(MF, MBB.findDebugLoc(I), + TII->get(LoadLiteralOpcode)) + .addOperand(MI.getOperand(1))); + llvm::finalizeBundle(MBB, Bundle.begin()); + MI.eraseFromParent(); + break; + } + } + } + } + return false; +} diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp new file mode 100644 index 0000000000..7e59b42749 --- /dev/null +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -0,0 +1,20 @@ +//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + + +#include "SIMachineFunctionInfo.h" + +using namespace llvm; + +SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo(), + SPIPSInputAddr(0), + ShaderType(0) + { } diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h new file mode 100644 index 0000000000..47271f5a1e --- /dev/null +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -0,0 +1,34 @@ +//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +// +//===----------------------------------------------------------------------===// + + +#ifndef SIMACHINEFUNCTIONINFO_H_ +#define SIMACHINEFUNCTIONINFO_H_ + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which +/// tells the hardware which interpolation parameters to load. +class SIMachineFunctionInfo : public MachineFunctionInfo { +public: + SIMachineFunctionInfo(const MachineFunction &MF); + unsigned SPIPSInputAddr; + unsigned ShaderType; +}; + +} // End namespace llvm + + +#endif //_SIMACHINEFUNCTIONINFO_H_ diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp new file mode 100644 index 0000000000..88275c523f --- /dev/null +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -0,0 +1,48 @@ +//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief SI implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + + +#include "SIRegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +using namespace llvm; + +SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPURegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } + +BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; +} + +const TargetRegisterClass * +SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { + switch (rc->getID()) { + case AMDGPU::GPRF32RegClassID: + return &AMDGPU::VReg_32RegClass; + default: return rc; + } +} + +const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( + MVT VT) const { + switch(VT.SimpleTy) { + default: + case MVT::i32: return &AMDGPU::VReg_32RegClass; + } +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h new file mode 100644 index 0000000000..40171e4450 --- /dev/null +++ b/lib/Target/R600/SIRegisterInfo.h @@ -0,0 +1,47 @@ +//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Interface definition for SIRegisterInfo +// +//===----------------------------------------------------------------------===// + + +#ifndef SIREGISTERINFO_H_ +#define SIREGISTERINFO_H_ + +#include "AMDGPURegisterInfo.h" + +namespace llvm { + +class AMDGPUTargetMachine; +class TargetInstrInfo; + +struct SIRegisterInfo : public AMDGPURegisterInfo { + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + /// \param RC is an AMDIL reg class. + /// + /// \returns the SI register class that is equivalent to \p RC. + virtual const TargetRegisterClass * + getISARegClass(const TargetRegisterClass *RC) const; + + /// \brief get the register class of the specified type to use in the + /// CFGStructurizer + virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; +}; + +} // End namespace llvm + +#endif // SIREGISTERINFO_H_ diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td new file mode 100644 index 0000000000..c3f136191a --- /dev/null +++ b/lib/Target/R600/SIRegisterInfo.td @@ -0,0 +1,167 @@ + +let Namespace = "AMDGPU" in { + def low : SubRegIndex; + def high : SubRegIndex; + + def sub0 : SubRegIndex; + def sub1 : SubRegIndex; + def sub2 : SubRegIndex; + def sub3 : SubRegIndex; + def sub4 : SubRegIndex; + def sub5 : SubRegIndex; + def sub6 : SubRegIndex; + def sub7 : SubRegIndex; +} + +class SIReg <string n, bits<16> encoding = 0> : Register<n> { + let Namespace = "AMDGPU"; + let HWEncoding = encoding; +} + +class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> { + let Namespace = "AMDGPU"; + let SubRegIndices = [low, high]; + let HWEncoding = encoding; +} + +class SGPR_32 <bits<16> num, string name> : SIReg<name, num>; + +class VGPR_32 <bits<16> num, string name> : SIReg<name, num>; + +// Special Registers +def VCC : SIReg<"VCC", 106>; +def EXEC_LO : SIReg <"EXEC LO", 126>; +def EXEC_HI : SIReg <"EXEC HI", 127>; +def EXEC : SI_64<"EXEC", [EXEC_LO, EXEC_HI], 126>; +def SCC : SIReg<"SCC", 253>; +def SREG_LIT_0 : SIReg <"S LIT 0", 128>; +def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT", 255>; +def M0 : SIReg <"M0", 124>; + +//Interpolation registers +def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; +def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; +def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; +def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">; +def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">; +def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">; +def PERSP_I_W : SIReg <"PERSP_I_W">; +def PERSP_J_W : SIReg <"PERSP_J_W">; +def PERSP_1_W : SIReg <"PERSP_1_W">; +def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">; +def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">; +def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">; +def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">; +def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">; +def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">; +def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">; +def POS_X_FLOAT : SIReg <"POS_X_FLOAT">; +def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">; +def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">; +def POS_W_FLOAT : SIReg <"POS_W_FLOAT">; +def FRONT_FACE : SIReg <"FRONT_FACE">; +def ANCILLARY : SIReg <"ANCILLARY">; +def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; +def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; + +// SGPR 32-bit registers +foreach Index = 0-101 in { + def SGPR#Index : SGPR_32 <Index, "SGPR"#Index>; +} + +def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add (sequence "SGPR%u", 0, 101))>; + +// SGPR 64-bit registers +def SGPR_64 : RegisterTuples<[low, high], + [(add (decimate SGPR_32, 2)), + (add(decimate (rotl SGPR_32, 1), 2))]>; + +// SGPR 128-bit registers +def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], + [(add (decimate SGPR_32, 4)), + (add (decimate (rotl SGPR_32, 1), 4)), + (add (decimate (rotl SGPR_32, 2), 4)), + (add (decimate (rotl SGPR_32, 3), 4))]>; + +// SGPR 256-bit registers +def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], + [(add (decimate SGPR_32, 8)), + (add (decimate (rotl SGPR_32, 1), 8)), + (add (decimate (rotl SGPR_32, 2), 8)), + (add (decimate (rotl SGPR_32, 3), 8)), + (add (decimate (rotl SGPR_32, 4), 8)), + (add (decimate (rotl SGPR_32, 5), 8)), + (add (decimate (rotl SGPR_32, 6), 8)), + (add (decimate (rotl SGPR_32, 7), 8))]>; + +// VGPR 32-bit registers +foreach Index = 0-255 in { + def VGPR#Index : VGPR_32 <Index, "VGPR"#Index>; +} + +def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add (sequence "VGPR%u", 0, 255))>; + +// VGPR 64-bit registers +def VGPR_64 : RegisterTuples<[low, high], + [(add VGPR_32), + (add (rotl VGPR_32, 1))]>; + +// VGPR 128-bit registers +def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w], + [(add VGPR_32), + (add (rotl VGPR_32, 1)), + (add (rotl VGPR_32, 2)), + (add (rotl VGPR_32, 3))]>; + +// Register class for all scalar registers (SGPRs + Special Registers) +def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add SGPR_32, SREG_LIT_0, M0, EXEC_LO, EXEC_HI) +>; + +def SReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add SGPR_64, VCC, EXEC)>; + +def SReg_1 : RegisterClass<"AMDGPU", [i1], 1, (add VCC, SGPR_64, EXEC)>; + +def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; + +def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; + +// Register class for all vector registers (VGPRs + Interploation Registers) +def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, + (add VGPR_32, + PERSP_SAMPLE_I, PERSP_SAMPLE_J, + PERSP_CENTER_I, PERSP_CENTER_J, + PERSP_CENTROID_I, PERSP_CENTROID_J, + PERSP_I_W, PERSP_J_W, PERSP_1_W, + LINEAR_SAMPLE_I, LINEAR_SAMPLE_J, + LINEAR_CENTER_I, LINEAR_CENTER_J, + LINEAR_CENTROID_I, LINEAR_CENTROID_J, + LINE_STIPPLE_TEX_COORD, + POS_X_FLOAT, + POS_Y_FLOAT, + POS_Z_FLOAT, + POS_W_FLOAT, + FRONT_FACE, + ANCILLARY, + SAMPLE_COVERAGE, + POS_FIXED_PT + ) +>; + +def VReg_64 : RegisterClass<"AMDGPU", [i64], 64, (add VGPR_64)>; + +def VReg_128 : RegisterClass<"AMDGPU", [v4f32], 128, (add VGPR_128)>; + +// AllReg_* - A set of all scalar and vector registers of a given width. +def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32, (add VReg_32, SReg_32)>; + +def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64, (add SReg_64, VReg_64)>; + +// Special register classes for predicates and the M0 register +def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>; +def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>; +def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>; +def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>; + diff --git a/lib/Target/R600/SISchedule.td b/lib/Target/R600/SISchedule.td new file mode 100644 index 0000000000..28b65b8258 --- /dev/null +++ b/lib/Target/R600/SISchedule.td @@ -0,0 +1,15 @@ +//===-- SISchedule.td - SI Scheduling definitons -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: This is just a place holder for now. +// +//===----------------------------------------------------------------------===// + + +def SI_Itin : ProcessorItineraries <[], [], []>; diff --git a/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp new file mode 100644 index 0000000000..46b1f18c62 --- /dev/null +++ b/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp @@ -0,0 +1,26 @@ +//===-- TargetInfo/AMDGPUTargetInfo.cpp - TargetInfo for AMDGPU -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +/// \brief The target for the AMDGPU backend +Target llvm::TheAMDGPUTarget; + +/// \brief Extern function to initialize the targets for the AMDGPU backend +extern "C" void LLVMInitializeR600TargetInfo() { + RegisterTarget<Triple::r600, false> + R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX"); +} diff --git a/lib/Target/R600/TargetInfo/CMakeLists.txt b/lib/Target/R600/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000..3d1584eba3 --- /dev/null +++ b/lib/Target/R600/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMR600Info + AMDGPUTargetInfo.cpp + ) + +add_dependencies(LLVMR600Info AMDGPUCommonTableGen intrinsics_gen) diff --git a/lib/Target/R600/TargetInfo/LLVMBuild.txt b/lib/Target/R600/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000000..4c6fea4aa0 --- /dev/null +++ b/lib/Target/R600/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/R600/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = R600Info +parent = R600 +required_libraries = MC Support +add_to_library_groups = R600 diff --git a/lib/Target/R600/TargetInfo/Makefile b/lib/Target/R600/TargetInfo/Makefile new file mode 100644 index 0000000000..b8ac4e7823 --- /dev/null +++ b/lib/Target/R600/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/AMDGPU/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMR600Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 8165f5b8cc..a9aab86abd 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -262,22 +262,7 @@ unsigned countbits_slow(unsigned v) { c += v & 1; return c; } -unsigned countbits_fast(unsigned v){ - unsigned c; - for (c = 0; v; c++) - v &= v - 1; // clear the least significant bit set - return c; -} -BITBOARD = unsigned long long -int PopCnt(register BITBOARD a) { - register int c=0; - while(a) { - c++; - a &= a - 1; - } - return c; -} unsigned int popcount(unsigned int input) { unsigned int count = 0; for (unsigned int i = 0; i < 4 * 8; i++) diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index 874a5d5c51..6c47c70222 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -19,8 +19,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 5beed7635f..5fa545d301 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -13,7 +13,7 @@ #include "SparcTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index bc6e8ddfd7..168640f457 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Module.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 303e46213e..9c1c30b9d3 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -19,9 +19,9 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" #define GET_REGINFO_TARGET_DESC #include "SparcGenRegisterInfo.inc" diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 7c628d7c55..60bceb708f 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -36,7 +36,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayout()), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), STTI(&TLInfo), VTTI(&TLInfo) { + FrameLowering(Subtarget) { } namespace { diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 53cf276a3e..081075de2d 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -19,10 +19,9 @@ #include "SparcInstrInfo.h" #include "SparcSelectionDAGInfo.h" #include "SparcSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -33,8 +32,6 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -55,12 +52,6 @@ public: virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp index c9d5b7bdfb..bb71463234 100644 --- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp +++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "Sparc.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 8571bc0fa1..9a78ebc3fa 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -14,9 +14,9 @@ #include "llvm-c/Target.h" #include "llvm-c/Initialization.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" -#include "llvm/LLVMContext.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetLibraryInfo.h" #include <cstring> @@ -26,7 +26,6 @@ using namespace llvm; void llvm::initializeTarget(PassRegistry &Registry) { initializeDataLayoutPass(Registry); initializeTargetLibraryInfoPass(Registry); - initializeTargetTransformInfoPass(Registry); } void LLVMInitializeTarget(LLVMPassRegistryRef R) { diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp index 8e8fd93801..64bd56f6e7 100644 --- a/lib/Target/TargetIntrinsicInfo.cpp +++ b/lib/Target/TargetIntrinsicInfo.cpp @@ -13,7 +13,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" using namespace llvm; TargetIntrinsicInfo::TargetIntrinsicInfo() { diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 1ca47d023f..f5121e34f7 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index efc873d55a..6af040936a 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalValue.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 10c770b135..1e4c195b0f 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -14,8 +14,8 @@ #include "llvm-c/TargetMachine.h" #include "llvm-c/Core.h" #include "llvm-c/Target.h" -#include "llvm/DataLayout.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp deleted file mode 100644 index 7f9fdfc938..0000000000 --- a/lib/Target/TargetTransformImpl.cpp +++ /dev/null @@ -1,372 +0,0 @@ -// llvm/Target/TargetTransformImpl.cpp - Target Loop Trans Info ---*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/Target/TargetLowering.h" -#include <utility> - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// -// Calls used by scalar transformations. -// -//===----------------------------------------------------------------------===// - -bool ScalarTargetTransformImpl::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); -} - -bool ScalarTargetTransformImpl::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); -} - -bool ScalarTargetTransformImpl::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - return TLI->isLegalAddressingMode(AM, Ty); -} - -bool ScalarTargetTransformImpl::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); -} - -bool ScalarTargetTransformImpl::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); -} - -unsigned ScalarTargetTransformImpl::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); -} - -unsigned ScalarTargetTransformImpl::getJumpBufSize() const { - return TLI->getJumpBufSize(); -} - -bool ScalarTargetTransformImpl::shouldBuildLookupTables() const { - return TLI->supportJumpTables() && - (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); -} - -//===----------------------------------------------------------------------===// -// -// Calls used by the vectorizers. -// -//===----------------------------------------------------------------------===// -int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const { - enum InstructionOpcodes { -#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, -#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM -#include "llvm/Instruction.def" - }; - switch (static_cast<InstructionOpcodes>(Opcode)) { - case Ret: return 0; - case Br: return 0; - case Switch: return 0; - case IndirectBr: return 0; - case Invoke: return 0; - case Resume: return 0; - case Unreachable: return 0; - case Add: return ISD::ADD; - case FAdd: return ISD::FADD; - case Sub: return ISD::SUB; - case FSub: return ISD::FSUB; - case Mul: return ISD::MUL; - case FMul: return ISD::FMUL; - case UDiv: return ISD::UDIV; - case SDiv: return ISD::UDIV; - case FDiv: return ISD::FDIV; - case URem: return ISD::UREM; - case SRem: return ISD::SREM; - case FRem: return ISD::FREM; - case Shl: return ISD::SHL; - case LShr: return ISD::SRL; - case AShr: return ISD::SRA; - case And: return ISD::AND; - case Or: return ISD::OR; - case Xor: return ISD::XOR; - case Alloca: return 0; - case Load: return ISD::LOAD; - case Store: return ISD::STORE; - case GetElementPtr: return 0; - case Fence: return 0; - case AtomicCmpXchg: return 0; - case AtomicRMW: return 0; - case Trunc: return ISD::TRUNCATE; - case ZExt: return ISD::ZERO_EXTEND; - case SExt: return ISD::SIGN_EXTEND; - case FPToUI: return ISD::FP_TO_UINT; - case FPToSI: return ISD::FP_TO_SINT; - case UIToFP: return ISD::UINT_TO_FP; - case SIToFP: return ISD::SINT_TO_FP; - case FPTrunc: return ISD::FP_ROUND; - case FPExt: return ISD::FP_EXTEND; - case PtrToInt: return ISD::BITCAST; - case IntToPtr: return ISD::BITCAST; - case BitCast: return ISD::BITCAST; - case ICmp: return ISD::SETCC; - case FCmp: return ISD::SETCC; - case PHI: return 0; - case Call: return 0; - case Select: return ISD::SELECT; - case UserOp1: return 0; - case UserOp2: return 0; - case VAArg: return 0; - case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; - case InsertElement: return ISD::INSERT_VECTOR_ELT; - case ShuffleVector: return ISD::VECTOR_SHUFFLE; - case ExtractValue: return ISD::MERGE_VALUES; - case InsertValue: return ISD::MERGE_VALUES; - case LandingPad: return 0; - } - - llvm_unreachable("Unknown instruction type encountered!"); -} - -std::pair<unsigned, MVT> -VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const { - - LLVMContext &C = Ty->getContext(); - EVT MTy = TLI->getValueType(Ty); - - unsigned Cost = 1; - // We keep legalizing the type until we find a legal kind. We assume that - // the only operation that costs anything is the split. After splitting - // we need to handle two types. - while (true) { - TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, MTy); - - if (LK.first == TargetLowering::TypeLegal) - return std::make_pair(Cost, MTy.getSimpleVT()); - - if (LK.first == TargetLowering::TypeSplitVector || - LK.first == TargetLowering::TypeExpandInteger) - Cost *= 2; - - // Keep legalizing the type. - MTy = LK.second; - } -} - -unsigned -VectorTargetTransformImpl::getScalarizationOverhead(Type *Ty, - bool Insert, - bool Extract) const { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - unsigned Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - -unsigned VectorTargetTransformImpl::getArithmeticInstrCost(unsigned Opcode, - Type *Ty) const { - // Check if any of the operands are vector operands. - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty); - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. Multiply - // by the type-legalization overhead. - return LT.first * 1; - } - - // Else, assume that we need to scalarize this op. - if (Ty->isVectorTy()) { - unsigned Num = Ty->getVectorNumElements(); - unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); - // return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(Ty, true, true) + Num * Cost; - } - - // We don't know anything about this scalar instruction. - return 1; -} - -unsigned VectorTargetTransformImpl::getBroadcastCost(Type *Tp) const { - return 1; -} - -unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - std::pair<unsigned, MVT> SrcLT = getTypeLegalizationCost(Src); - std::pair<unsigned, MVT> DstLT = getTypeLegalizationCost(Dst); - - // Handle scalar conversions. - if (!Src->isVectorTy() && !Dst->isVectorTy()) { - - // Scalar bitcasts are usually free. - if (Opcode == Instruction::BitCast) - return 0; - - if (Opcode == Instruction::Trunc && - TLI->isTruncateFree(SrcLT.second, DstLT.second)) - return 0; - - if (Opcode == Instruction::ZExt && - TLI->isZExtFree(SrcLT.second, DstLT.second)) - return 0; - - // Just check the op cost. If the operation is legal then assume it costs 1. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return 1; - - // Assume that illegal scalar instruction are expensive. - return 4; - } - - // Check vector-to-vector casts. - if (Dst->isVectorTy() && Src->isVectorTy()) { - - // If the cast is between same-sized registers, then the check is simple. - if (SrcLT.first == DstLT.first && - SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { - - // Bitcast between types that are legalized to the same type are free. - if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) - return 0; - - // Assume that Zext is done using AND. - if (Opcode == Instruction::ZExt) - return 1; - - // Assume that sext is done using SHL and SRA. - if (Opcode == Instruction::SExt) - return 2; - - // Just check the op cost. If the operation is legal then assume it costs - // 1 and multiply by the type-legalization overhead. - if (!TLI->isOperationExpand(ISD, DstLT.second)) - return SrcLT.first * 1; - } - - // If we are converting vectors and the operation is illegal, or - // if the vectors are legalized to different types, estimate the - // scalarization costs. - unsigned Num = Dst->getVectorNumElements(); - unsigned Cost = getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); - - // Return the cost of multiple scalar invocation plus the cost of - // inserting and extracting the values. - return getScalarizationOverhead(Dst, true, true) + Num * Cost; - } - - // We already handled vector-to-vector and scalar-to-scalar conversions. This - // is where we handle bitcast between vectors and scalars. We need to assume - // that the conversion is scalarized in one way or another. - if (Opcode == Instruction::BitCast) - // Illegal bitcasts are done by storing and loading from a stack slot. - return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + - (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); - - llvm_unreachable("Unhandled cast"); - } - -unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const { - return 0; -} - -unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode, - Type *ValTy, - Type *CondTy) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - // Selects on vectors are actually vector selects. - if (ISD == ISD::SELECT) { - assert(CondTy && "CondTy must exist"); - if (CondTy->isVectorTy()) - ISD = ISD::VSELECT; - } - - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy); - - if (!TLI->isOperationExpand(ISD, LT.second)) { - // The operation is legal. Assume it costs 1. Multiply - // by the type-legalization overhead. - return LT.first * 1; - } - - // Otherwise, assume that the cast is scalarized. - if (ValTy->isVectorTy()) { - unsigned Num = ValTy->getVectorNumElements(); - if (CondTy) - CondTy = CondTy->getScalarType(); - unsigned Cost = getCmpSelInstrCost(Opcode, ValTy->getScalarType(), - CondTy); - - // Return the cost of multiple scalar invocation plus the cost of inserting - // and extracting the values. - return getScalarizationOverhead(ValTy, true, false) + Num * Cost; - } - - // Unknown scalar opcode. - return 1; -} - -unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode, - Type *Val, - unsigned Index) const { - return 1; -} - -unsigned -VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1, - Type *Ty2) const { - return 1; -} - -unsigned -VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src); - - // Assume that all loads of legal types cost 1. - return LT.first; -} - -unsigned -VectorTargetTransformImpl::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, - ArrayRef<Type*> Tys) const { - // assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - } - return ScalarCalls + ScalarizationCost; -} - -unsigned -VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const { - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp); - return LT.first; -} diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt index 47489bb06c..54204d4b63 100644 --- a/lib/Target/X86/AsmParser/CMakeLists.txt +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMX86AsmParser - X86AsmLexer.cpp X86AsmParser.cpp ) diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp deleted file mode 100644 index b12399d447..0000000000 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ /dev/null @@ -1,159 +0,0 @@ -//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/X86BaseInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCTargetAsmLexer.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -namespace { - -class X86AsmLexer : public MCTargetAsmLexer { - const MCAsmInfo &AsmInfo; - - bool tentativeIsValid; - AsmToken tentativeToken; - - const AsmToken &lexTentative() { - tentativeToken = getLexer()->Lex(); - tentativeIsValid = true; - return tentativeToken; - } - - const AsmToken &lexDefinite() { - if (tentativeIsValid) { - tentativeIsValid = false; - return tentativeToken; - } - return getLexer()->Lex(); - } - - AsmToken LexTokenATT(); - AsmToken LexTokenIntel(); -protected: - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenATT(); - case 1: - return LexTokenIntel(); - } - } -public: - X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) - : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { - } -}; - -} // end anonymous namespace - -#define GET_REGISTER_MATCHER -#include "X86GenAsmMatcher.inc" - -AsmToken X86AsmLexer::LexTokenATT() { - AsmToken lexedToken = lexDefinite(); - - switch (lexedToken.getKind()) { - default: - return lexedToken; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - return lexedToken; - - case AsmToken::Percent: { - const AsmToken &nextToken = lexTentative(); - if (nextToken.getKind() != AsmToken::Identifier) - return lexedToken; - - if (unsigned regID = MatchRegisterName(nextToken.getString())) { - lexDefinite(); - - // FIXME: This is completely wrong when there is a space or other - // punctuation between the % and the register name. - StringRef regStr(lexedToken.getString().data(), - lexedToken.getString().size() + - nextToken.getString().size()); - - return AsmToken(AsmToken::Register, regStr, - static_cast<int64_t>(regID)); - } - - // Match register name failed. If this is "db[0-7]", match it as an alias - // for dr[0-7]. - if (nextToken.getString().size() == 3 && - nextToken.getString().startswith("db")) { - int RegNo = -1; - switch (nextToken.getString()[2]) { - case '0': RegNo = X86::DR0; break; - case '1': RegNo = X86::DR1; break; - case '2': RegNo = X86::DR2; break; - case '3': RegNo = X86::DR3; break; - case '4': RegNo = X86::DR4; break; - case '5': RegNo = X86::DR5; break; - case '6': RegNo = X86::DR6; break; - case '7': RegNo = X86::DR7; break; - } - - if (RegNo != -1) { - lexDefinite(); - - // FIXME: This is completely wrong when there is a space or other - // punctuation between the % and the register name. - StringRef regStr(lexedToken.getString().data(), - lexedToken.getString().size() + - nextToken.getString().size()); - return AsmToken(AsmToken::Register, regStr, - static_cast<int64_t>(RegNo)); - } - } - - - return lexedToken; - } - } -} - -AsmToken X86AsmLexer::LexTokenIntel() { - const AsmToken &lexedToken = lexDefinite(); - - switch(lexedToken.getKind()) { - default: - return lexedToken; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - return lexedToken; - case AsmToken::Identifier: { - unsigned regID = MatchRegisterName(lexedToken.getString().lower()); - - if (regID) - return AsmToken(AsmToken::Register, - lexedToken.getString(), - static_cast<int64_t>(regID)); - return lexedToken; - } - } -} - -extern "C" void LLVMInitializeX86AsmLexer() { - RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target); - RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target); -} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index fdb7583ed0..5ce258ed0f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -168,6 +168,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; + bool AddressOf; union { struct { @@ -340,6 +341,10 @@ struct X86Operand : public MCParsedAsmOperand { return OffsetOfLoc.getPointer(); } + bool needAddressOf() const { + return AddressOf; + } + bool needSizeDirective() const { assert(Kind == Memory && "Invalid access!"); return Mem.NeedSizeDir; @@ -463,7 +468,7 @@ struct X86Operand : public MCParsedAsmOperand { } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { - SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1); + SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); X86Operand *Res = new X86Operand(Token, Loc, EndLoc); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); @@ -471,9 +476,11 @@ struct X86Operand : public MCParsedAsmOperand { } static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc()) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; + Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; return Res; } @@ -488,7 +495,7 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false){ + unsigned Size = 0, bool NeedSizeDir = false) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -497,6 +504,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = 1; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } @@ -520,6 +528,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = Scale; Res->Mem.Size = Size; Res->Mem.NeedSizeDir = NeedSizeDir; + Res->AddressOf = false; return Res; } }; @@ -558,10 +567,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, Parser.Lex(); // Eat percent token. const AsmToken &Tok = Parser.getTok(); + EndLoc = Tok.getEndLoc(); + if (Tok.isNot(AsmToken::Identifier)) { if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", - SMRange(StartLoc, Tok.getEndLoc())); + SMRange(StartLoc, EndLoc)); } RegNo = MatchRegisterName(Tok.getString()); @@ -582,13 +593,12 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, X86II::isX86_64ExtendedReg(RegNo)) return Error(StartLoc, "register %" + Tok.getString() + " is only available in 64-bit mode", - SMRange(StartLoc, Tok.getEndLoc())); + SMRange(StartLoc, EndLoc)); } // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { RegNo = X86::ST0; - EndLoc = Tok.getLoc(); Parser.Lex(); // Eat 'st' // Check to see if we have '(4)' after %st. @@ -615,11 +625,13 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, if (getParser().Lex().isNot(AsmToken::RParen)) return Error(Parser.getTok().getLoc(), "expected ')'"); - EndLoc = Tok.getLoc(); + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat ')' return false; } + EndLoc = Parser.getTok().getEndLoc(); + // If this is "db[0-7]", match it as an alias // for dr[0-7]. if (RegNo == 0 && Tok.getString().size() == 3 && @@ -636,7 +648,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, } if (RegNo != 0) { - EndLoc = Tok.getLoc(); + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat it. return false; } @@ -645,10 +657,9 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, if (RegNo == 0) { if (isParsingIntelSyntax()) return true; return Error(StartLoc, "invalid register name", - SMRange(StartLoc, Tok.getEndLoc())); + SMRange(StartLoc, EndLoc)); } - EndLoc = Tok.getEndLoc(); Parser.Lex(); // Eat identifier token. return false; } @@ -677,7 +688,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { unsigned BaseReg = 0, IndexReg = 0, Scale = 1; const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Tok.getLoc(), End; + SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ] @@ -693,9 +704,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // Handle '[' 'symbol' ']' if (getParser().ParseExpression(Disp, End)) return 0; if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(Start, "Expected ']' token!"); + return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); return X86Operand::CreateMem(Disp, Start, End, Size); } } else if (getLexer().is(AsmToken::Integer)) { @@ -704,8 +715,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Loc = Tok.getLoc(); if (getLexer().is(AsmToken::RBrac)) { // Handle '[' number ']' + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); const MCExpr *Disp = MCConstantExpr::Create(Val, getContext()); if (SegReg) return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale, @@ -726,8 +737,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, bool ExpectRBrac = true; if (getLexer().is(AsmToken::RBrac)) { ExpectRBrac = false; + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); } if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus) || @@ -753,18 +764,18 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return ErrorOperand(PlusLoc, "unexpected token after +"); } else if (getLexer().is(AsmToken::Identifier)) { // This could be an index register or a displacement expression. - End = Tok.getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) + return 0; } } // Parse ][ as a plus. if (getLexer().is(AsmToken::RBrac)) { ExpectRBrac = false; + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); if (getLexer().is(AsmToken::LBrac)) { ExpectRBrac = true; Parser.Lex(); @@ -772,15 +783,15 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, return 0; } } else if (ExpectRBrac) { - if (getParser().ParseExpression(Disp, End)) - return 0; + if (getParser().ParseExpression(Disp, End)) + return 0; } if (ExpectRBrac) { if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(End, "expected ']' token!"); + End = Parser.getTok().getEndLoc(); Parser.Lex(); - End = Tok.getLoc(); } // Parse the dot operator (e.g., [ebx].foo.bar). @@ -790,12 +801,11 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (ParseIntelDotOperator(Disp, &NewDisp, Err)) return ErrorOperand(Tok.getLoc(), Err); + End = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the field. Disp = NewDisp; } - End = Tok.getLoc(); - // handle [-42] if (!BaseReg && !IndexReg) return X86Operand::CreateMem(Disp, Start, End, Size); @@ -831,28 +841,43 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - if (getParser().ParseExpression(Disp, End)) return 0; - End = Parser.getTok().getLoc(); + if (getParser().ParseExpression(Disp, End)) + return 0; bool NeedSizeDir = false; - if (!Size && isParsingInlineAsm()) { + bool IsVarDecl = false; + if (isParsingInlineAsm()) { if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size); + unsigned tSize; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tSize, + IsVarDecl); + if (!Size) + Size = tSize; NeedSizeDir = Size > 0; } } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else + else { + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + // When parsing inline assembly we set the base register to a non-zero value // as we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, /*Scale*/1, Start, End, Size, NeedSizeDir); + } } /// Parse the '.' operator. @@ -921,8 +946,6 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { if (getParser().ParseExpression(Val, End)) return ErrorOperand(Start, "Unable to parse expression!"); - End = Parser.getTok().getLoc(); - // Don't emit the offset operator. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); @@ -930,7 +953,8 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { // register operand to ensure proper matching. Just pick a GPR based on // the size of a pointer. unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, OffsetOfLoc); + return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, + OffsetOfLoc); } /// Parse the 'TYPE' operator. The TYPE operator returns the size of a C or @@ -947,15 +971,15 @@ X86Operand *X86AsmParser::ParseIntelTypeOperator(SMLoc Start) { if (getParser().ParseExpression(Val, End)) return 0; - End = Parser.getTok().getLoc(); - unsigned Size = 0; if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { const MCSymbol &Sym = SymRef->getSymbol(); // FIXME: The SemaLookup will fail if the name is anything other then an // identifier. // FIXME: Pass a valid SMLoc. - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size)) + bool IsVarDecl; + if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size, + IsVarDecl)) return ErrorOperand(Start, "Unable to lookup TYPE of expr!"); Size /= 8; // Size is in terms of bits, but we want bytes in the context. @@ -995,7 +1019,6 @@ X86Operand *X86AsmParser::ParseIntelOperand() { getLexer().is(AsmToken::Minus)) { const MCExpr *Val; if (!getParser().ParseExpression(Val, End)) { - End = Parser.getTok().getLoc(); return X86Operand::CreateImm(Val, Start, End); } } @@ -1006,7 +1029,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { // If this is a segment register followed by a ':', then this is the start // of a memory reference, otherwise this is a normal register reference. if (getLexer().isNot(AsmToken::Colon)) - return X86Operand::CreateReg(RegNo, Start, Parser.getTok().getLoc()); + return X86Operand::CreateReg(RegNo, Start, End); getParser().Lex(); // Eat the colon. return ParseIntelMemOperand(RegNo, Start); @@ -1183,7 +1206,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); return 0; } - SMLoc MemEnd = Parser.getTok().getLoc(); + SMLoc MemEnd = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ')'. // If we have both a base register and an index register make sure they are @@ -2021,7 +2044,7 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().ParseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -2059,14 +2082,10 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { return false; } - -extern "C" void LLVMInitializeX86AsmLexer(); - // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target); RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target); - LLVMInitializeX86AsmLexer(); } #define GET_REGISTER_MATCHER diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 1b2ffb01ad..af1381ebdf 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -10,7 +10,6 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel) tablegen(LLVM X86GenFastISel.inc -gen-fast-isel) tablegen(LLVM X86GenCallingConv.inc -gen-callingconv) tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(X86CommonTableGen) set(sources @@ -27,11 +26,13 @@ set(sources X86MCInstLower.cpp X86MachineFunctionInfo.cpp X86NaClRewritePass.cpp + X86PadShortFunction.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp X86TargetMachine.cpp X86TargetObjectFile.cpp + X86TargetTransformInfo.cpp X86VZeroUpper.cpp ) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index ed61c01130..ca6f80ce3e 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -16,7 +16,6 @@ #include "X86Disassembler.h" #include "X86DisassemblerDecoder.h" -#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCExpr.h" @@ -32,7 +31,6 @@ #include "X86GenRegisterInfo.inc" #define GET_INSTRINFO_ENUM #include "X86GenInstrInfo.inc" -#include "X86GenEDInfo.inc" using namespace llvm; using namespace llvm::X86Disassembler; @@ -83,10 +81,6 @@ X86GenericDisassembler::~X86GenericDisassembler() { delete MII; } -const EDInstInfo *X86GenericDisassembler::getEDInfo() const { - return instInfoX86; -} - /// regionReader - a callback function that wraps the readByte method from /// MemoryObject. /// diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 981701f527..b92427a7e9 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -95,8 +95,6 @@ class MCSubtargetInfo; class MemoryObject; class raw_ostream; -struct EDInstInfo; - namespace X86Disassembler { /// X86GenericDisassembler - Generic disassembler for all X86 platforms. @@ -122,8 +120,6 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; private: DisassemblerMode fMode; }; diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 8346b3392d..1926d4de29 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -114,7 +114,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; void relaxInstruction(const MCInst &Inst, MCInst &Res) const; @@ -256,7 +256,7 @@ bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // Relax if the value is too big for a (signed) i8. return int64_t(Value) != int64_t(int8_t(Value)); @@ -280,9 +280,9 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.setOpcode(RelaxedOp); } -/// writeNopData - Write optimal nops to the output file for the \p Count -/// bytes. This returns the number of bytes written. It may return 0 if -/// the \p Count is more than the maximum optimal nops. +/// \brief Write a sequence of optimal nops to the output, covering \p Count +/// bytes. +/// \return - true on success, false on failure bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { static const uint8_t Nops[10][10] = { // nop diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp index 29d87ba2c6..9acaf68c82 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp @@ -46,8 +46,7 @@ unsigned DemoteRegTo32_(unsigned RegIn); static void EmitDirectCall(const MCOperand &Op, bool Is64Bit, MCStreamer &Out) { - Out.EmitBundleAlignEnd(); - Out.EmitBundleLock(); + Out.EmitBundleLock(true); MCInst CALLInst; CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); @@ -63,10 +62,7 @@ static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall, const unsigned Reg32 = Op.getReg(); const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64); - if (IsCall) - Out.EmitBundleAlignEnd(); - - Out.EmitBundleLock(); + Out.EmitBundleLock(IsCall); MCInst ANDInst; ANDInst.setOpcode(X86::AND32ri8); @@ -160,7 +156,7 @@ static void EmitRegFix(unsigned Reg64, MCStreamer &Out) { static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp, MCStreamer &Out) { - Out.EmitBundleLock(); + Out.EmitBundleLock(false); MCInst Tmp; Tmp.setOpcode(Opc); @@ -174,7 +170,7 @@ static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp, } static void EmitSPAdj(const MCOperand &ImmOp, MCStreamer &Out) { - Out.EmitBundleLock(); + Out.EmitBundleLock(false); MCInst Tmp; Tmp.setOpcode(X86::LEA64_32r); @@ -286,8 +282,7 @@ static bool SandboxMemoryRef(MCInst *Inst, } static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) { - Out.EmitBundleAlignEnd(); - Out.EmitBundleLock(); + Out.EmitBundleLock(true); MCInst LeaInst; LeaInst.setOpcode(X86::LEA32r); @@ -315,7 +310,7 @@ static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) { static void EmitREST(const MCInst &Inst, unsigned Reg32, bool IsMem, MCStreamer &Out) { unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64); - Out.EmitBundleLock(); + Out.EmitBundleLock(false); if (!IsMem) { EmitMoveRegReg(false, Reg32, Inst.getOperand(0).getReg(), Out); } else { @@ -479,7 +474,7 @@ bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) { PrefixSaved = 0; if (PrefixLocal || !UseZeroBasedSandbox) - Out.EmitBundleLock(); + Out.EmitBundleLock(false); HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out); ShortenMemoryRef(&SandboxedInst, IndexOpPosition); diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index f66b203f0d..c67bb944b6 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -257,7 +257,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitX86MCRegisterInfo(X, RA, X86_MC::getDwarfRegFlavour(TT, false), - X86_MC::getDwarfRegFlavour(TT, true)); + X86_MC::getDwarfRegFlavour(TT, true), + RA); X86_MC::InitLLVM2SEHRegisterMapping(X); return X; } @@ -365,7 +366,14 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF) return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll); - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); + // @LOCALMOD-BEGIN + MCStreamer *Streamer = createELFStreamer(Ctx, MAB, _OS, _Emitter, + RelaxAll, NoExecStack); + if (TheTriple.isOSNaCl()) + Streamer->EmitBundleAlignMode(5); + + return Streamer; + // @LOCALMOD-END } static MCInstPrinter *createX86MCInstPrinter(const Target &T, diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 949661eb99..e518fecf04 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -16,8 +16,7 @@ BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \ X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ - X86GenCallingConv.inc X86GenSubtargetInfo.inc \ - X86GenEDInfo.inc + X86GenCallingConv.inc X86GenSubtargetInfo.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 6a8a4fdf25..b4285a0718 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1568,43 +1568,6 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" //===---------------------------------------------------------------------===// - -Testcase: -int a(int x) { return (x & 127) > 31; } - -Current output: - movl 4(%esp), %eax - andl $127, %eax - cmpl $31, %eax - seta %al - movzbl %al, %eax - ret - -Ideal output: - xorl %eax, %eax - testl $96, 4(%esp) - setne %al - ret - -This should definitely be done in instcombine, canonicalizing the range -condition into a != condition. We get this IR: - -define i32 @a(i32 %x) nounwind readnone { -entry: - %0 = and i32 %x, 127 ; <i32> [#uses=1] - %1 = icmp ugt i32 %0, 31 ; <i1> [#uses=1] - %2 = zext i1 %1 to i32 ; <i32> [#uses=1] - ret i32 %2 -} - -Instcombine prefers to strength reduce relational comparisons to equality -comparisons when possible, this should be another case of that. This could -be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it -looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already -be redesigned to use ComputeMaskedBits and friends. - - -//===---------------------------------------------------------------------===// Testcase: int x(int a) { return (a&0xf0)>>4; } diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp index 52a67f763b..815d23588f 100644 --- a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp +++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "X86.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index b8b0a5a05b..8813aa836a 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -68,6 +68,13 @@ FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, /// FunctionPass *createEmitX86CodeToMemory(); +/// \brief Creates an X86-specific Target Transformation Info pass. +ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); + +/// createX86PadShortFunctions - Return a pass that pads short functions +/// with NOOPs. This will prevent a stall when returning on the Atom. +FunctionPass *createX86PadShortFunctions(); + } // End llvm namespace #endif diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index e3c22d9c3b..3ab2899365 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -123,8 +123,11 @@ def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", - "HasSlowDivide", "true", - "Use small divide for positive values less than 256">; + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; +def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", + "PadShortFunctions", "true", + "Pad short functions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -155,7 +158,8 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem]>; def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, @@ -166,7 +170,7 @@ def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide]>; + FeatureSlowDivide, FeaturePadShortFunctions]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 9cbc4ea649..24f94725e0 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -20,26 +20,26 @@ #include "X86TargetMachine.h" #include "llvm/ADT/SmallString.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -572,7 +572,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MCSA_IndirectSymbol); // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; - OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); + OutStreamer.EmitBytes(StringRef(HltInsts, 5)); } Stubs.clear(); @@ -598,7 +598,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long 0 if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -607,8 +607,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // using NLPs. However, sometimes the types are local to the file. So // we need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -625,8 +624,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // .long _foo OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/, 0/*addrspace*/); + OutContext), 4/*size*/); } Stubs.clear(); OutStreamer.AddBlankLine(); @@ -692,7 +690,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { name += ",DATA"; else name += ",data"; - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { @@ -701,7 +699,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { else name = " -export:"; name += DLLExportedFns[i]->getName(); - OutStreamer.EmitBytes(name, 0); + OutStreamer.EmitBytes(name); } } } @@ -721,7 +719,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(), 0); + TD->getPointerSize()); } Stubs.clear(); } diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 8dbff74e51..0a0a4c3f4d 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3201eb9cc0..670de2fa2e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -19,19 +19,19 @@ #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -849,8 +849,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -1661,9 +1660,9 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, CallingConv::ID CC = CS.getCallingConv(); if (CC == CallingConv::Fast || CC == CallingConv::GHC) return 0; - if (!CS.paramHasAttr(1, Attributes::StructRet)) + if (!CS.paramHasAttr(1, Attribute::StructRet)) return 0; - if (CS.paramHasAttr(1, Attributes::InReg)) + if (CS.paramHasAttr(1, Attribute::InReg)) return 0; return 4; } @@ -1701,8 +1700,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), *FuncInfo.MF, FTy->isVarArg(), Outs, FTy->getContext()); @@ -1742,12 +1740,12 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { Value *ArgVal = *i; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attributes::SExt)) + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attributes::ZExt)) + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); - if (CS.paramHasAttr(AttrInd, Attributes::ByVal)) { + if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) { PointerType *Ty = cast<PointerType>(ArgVal->getType()); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = TD.getTypeAllocSize(ElementTy); @@ -1761,9 +1759,9 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { return false; } - if (CS.paramHasAttr(AttrInd, Attributes::InReg)) + if (CS.paramHasAttr(AttrInd, Attribute::InReg)) Flags.setInReg(); - if (CS.paramHasAttr(AttrInd, Attributes::Nest)) + if (CS.paramHasAttr(AttrInd, Attribute::Nest)) Flags.setNest(); // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra @@ -2047,17 +2045,17 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ComputeValueVTs(TLI, I->getType(), RetTys); for (unsigned i = 0, e = RetTys.size(); i != e; ++i) { EVT VT = RetTys[i]; - EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT); for (unsigned j = 0; j != NumRegs; ++j) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT.getSimpleVT(); + MyFlags.VT = RegisterVT; MyFlags.Used = !CS.getInstruction()->use_empty(); - if (CS.paramHasAttr(0, Attributes::SExt)) + if (CS.paramHasAttr(0, Attribute::SExt)) MyFlags.Flags.setSExt(); - if (CS.paramHasAttr(0, Attributes::ZExt)) + if (CS.paramHasAttr(0, Attribute::ZExt)) MyFlags.Flags.setZExt(); - if (CS.paramHasAttr(0, Attributes::InReg)) + if (CS.paramHasAttr(0, Attribute::InReg)) MyFlags.Flags.setInReg(); Ins.push_back(MyFlags); } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 16197e09e9..0585b43a46 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -36,7 +36,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/InlineAsm.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 48dc67da38..3ae70c6780 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -23,8 +23,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" @@ -627,6 +627,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { return CompactUnwindEncoding; } +/// usesTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool usesTheStack(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), + re = MRI.reg_end(); ri != re; ++ri) + if (ri->isCopy()) + return true; + + return false; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -675,12 +691,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attributes::NoRedZone) && + // stack pointer (we fit in the Red Zone). We also check that we don't + // push and pop from the stack. + if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone + !usesTheStack(MF) && // Don't push and pop. !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; @@ -1161,7 +1180,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } MachineInstr *NewMI = prior(MBBI); - NewMI->copyImplicitOps(MBBI); + NewMI->copyImplicitOps(MF, MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index a35b9e0e1f..70cfa7d516 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -25,15 +25,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" using namespace llvm; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); @@ -435,6 +435,11 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { if (!Chain.getNumOperands()) return false; + // Since we are not checking for AA here, conservatively abort if the chain + // writes to memory. It's not safe to move the callee (a load) across a store. + if (isa<MemSDNode>(Chain.getNode()) && + cast<MemSDNode>(Chain.getNode())->writeMem()) + return false; if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && @@ -446,8 +451,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + OptForSize = MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { @@ -457,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { !Subtarget->isTargetNaCl() && // @LOCALMOD: We can't fold load/call (N->getOpcode() == X86ISD::CALL || (N->getOpcode() == X86ISD::TC_RETURN && - // Only does this if load can be foled into TC_RETURN. + // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || getTargetMachine().getRelocationModel() != Reloc::PIC_)))) { /// Also try moving call address load from outside callseq_start to just @@ -1064,8 +1069,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.IndexReg = ShVal; return false; } - break; } + break; case ISD::SRL: { // Scale must not be used already. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e05b43add8..61169626f1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -23,7 +23,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VariadicFunction.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -31,14 +30,15 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -882,6 +882,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ADD, MVT::v8i16, Legal); setOperationAction(ISD::ADD, MVT::v4i32, Legal); setOperationAction(ISD::ADD, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); @@ -1058,6 +1059,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v4i32, Custom); } + setOperationAction(ISD::SDIV, MVT::v8i16, Custom); + setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { @@ -1099,6 +1102,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FABS, MVT::v4f64, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); @@ -1121,6 +1125,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); + setOperationAction(ISD::SDIV, MVT::v16i16, Custom); + setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); @@ -1135,6 +1141,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); + if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); @@ -1169,6 +1182,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SHL, MVT::v8i32, Legal); setOperationAction(ISD::SRA, MVT::v8i32, Legal); + + setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -1250,7 +1265,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. // @@ -1333,13 +1347,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setPrefFunctionAlignment(4); // 2^4 bytes. } - EVT X86TargetLowering::getSetCCResultType(EVT VT) const { if (!VT.isVector()) return MVT::i8; return VT.changeVectorElementTypeToInteger(); } - /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { @@ -1389,22 +1401,22 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); - if (IsZeroVal && - !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) { + if ((!IsMemset || ZeroMemset) && + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -1432,6 +1444,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } +bool X86TargetLowering::isSafeMemOpType(MVT VT) const { + if (VT == MVT::f32) + return X86ScalarSSEf32; + else if (VT == MVT::f64) + return X86ScalarSSEf64; + return true; +} + bool X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { if (Fast) @@ -1492,10 +1512,10 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, // FIXME: Why this routine is here? Move to RegInfo! std::pair<const TargetRegisterClass*, uint8_t> -X86TargetLowering::findRepresentativeClass(EVT VT) const{ +X86TargetLowering::findRepresentativeClass(MVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: @@ -1537,7 +1557,6 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, return true; } - //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1718,8 +1737,8 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -EVT -X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, +MVT +X86TargetLowering::getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const { MVT ReturnMVT; // TODO: Is this also valid on 32-bit? @@ -1728,7 +1747,7 @@ X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, else ReturnMVT = MVT::i32; - EVT MinVT = getRegisterType(Context, ReturnMVT); + MVT MinVT = getRegisterType(ReturnMVT); return VT.bitsLT(MinVT) ? MinVT : VT; } @@ -1750,7 +1769,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; EVT CopyVT = VA.getValVT(); @@ -1794,7 +1813,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } - //===----------------------------------------------------------------------===// // C & StdCall & Fast Calling Convention implementation //===----------------------------------------------------------------------===// @@ -2008,10 +2026,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. - if (RegVT.isVector()) { - ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), - ArgValue); - } else + if (RegVT.isVector()) + ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); + else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } } else { @@ -2089,8 +2106,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - bool NoImplicitFloatOps = Fn->getFnAttributes(). - hasAttribute(Attributes::NoImplicitFloat); + bool NoImplicitFloatOps = Fn->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && @@ -2568,8 +2585,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OpFlags = X86II::MO_DARWIN_STUB; } else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) && - cast<Function>(GV)->getFnAttributes(). - hasAttribute(Attributes::NonLazyBind)) { + cast<Function>(GV)->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::NonLazyBind)) { // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead // at the cost of eager binding (and one extra byte of encoding). @@ -2687,7 +2705,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ins, dl, DAG, InVals); } - //===----------------------------------------------------------------------===// // Fast Calling Convention (tail call) implementation //===----------------------------------------------------------------------===// @@ -2996,7 +3013,6 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return X86::createFastISel(funcInfo, libInfo); } - //===----------------------------------------------------------------------===// // Other Lowering Hooks //===----------------------------------------------------------------------===// @@ -3108,7 +3124,6 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { Subtarget->is64Bit() ? MVT::i64 : MVT::i32); } - bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, bool hasSymbolicDisplacement) { // Offset should fit into 32 bit immediate field. @@ -6759,8 +6774,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool HasFp256 = Subtarget->hasFp256(); bool HasInt256 = Subtarget->hasInt256(); MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + bool OptForSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); @@ -7021,7 +7036,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, getShuffleCLImmediate(SVOp), DAG); - //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7123,7 +7137,6 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, return SDValue(); } - SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { @@ -7488,7 +7501,6 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - // With PIC, the address is actually $g + Offset. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && !Subtarget->is64Bit()) { @@ -7932,7 +7944,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for this target."); } - /// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values /// and take a 2 x i32 value to shift plus a shift amount. SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ @@ -8381,12 +8392,71 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co } } -SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = Op->getDebugLoc(); + + // Optimize vectors in AVX mode: + // + // v8i16 -> v8i32 + // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. + // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. + // Concat upper and lower parts. + // + // v4i32 -> v4i64 + // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. + // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. + // Concat upper and lower parts. + // + + if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) && + ((VT != MVT::v4i64) || (InVT != MVT::v4i32))) + return SDValue(); + + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, In); + + SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl); + SDValue Undef = DAG.getUNDEF(InVT); + bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND; + SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); + SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); +} + +SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget->hasFp256()) { + SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); + if (Res.getNode()) + return Res; + } + + return SDValue(); +} +SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op, + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); SDValue In = Op.getOperand(0); EVT SVT = In.getValueType(); + if (Subtarget->hasFp256()) { + SDValue Res = LowerAVXExtend(Op, DAG, Subtarget); + if (Res.getNode()) + return Res; + } + if (!VT.is256BitVector() || !SVT.is128BitVector() || VT.getVectorNumElements() != SVT.getVectorNumElements()) return SDValue(); @@ -8400,7 +8470,9 @@ SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In); static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1}; SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, - DAG.getVectorShuffle(MVT::v8i16, DL, In, DAG.getUNDEF(MVT::v8i16), &Mask[0])); + DAG.getVectorShuffle(MVT::v8i16, DL, In, + DAG.getUNDEF(MVT::v8i16), + &Mask[0])); return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi); } @@ -8408,19 +8480,109 @@ SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT VT = Op.getValueType(); - EVT SVT = Op.getOperand(0).getValueType(); + SDValue In = Op.getOperand(0); + EVT SVT = In.getValueType(); - if (!VT.is128BitVector() || !SVT.is256BitVector() || - VT.getVectorNumElements() != SVT.getVectorNumElements()) + if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) { + // On AVX2, v4i64 -> v4i32 becomes VPERMD. + if (Subtarget->hasInt256()) { + static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; + In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In); + In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), + ShufMask); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, + DAG.getIntPtrConstant(0)); + } + + // On AVX, v4i64 -> v4i32 becomes a sequence that uses PSHUFD and MOVLHPS. + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(0)); + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(2)); + + OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + + // The PSHUFD mask: + static const int ShufMask1[] = {0, 2, 0, 0}; + SDValue Undef = DAG.getUNDEF(VT); + OpLo = DAG.getVectorShuffle(VT, DL, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(VT, DL, OpHi, Undef, ShufMask1); + + // The MOVLHPS mask: + static const int ShufMask2[] = {0, 1, 4, 5}; + return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2); + } + + if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) { + // On AVX2, v8i32 -> v8i16 becomed PSHUFB. + if (Subtarget->hasInt256()) { + In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In); + + SmallVector<SDValue,32> pshufbMask; + for (unsigned i = 0; i < 2; ++i) { + pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8)); + for (unsigned j = 0; j < 8; ++j) + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, + &pshufbMask[0], 32); + In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); + In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In); + + static const int ShufMask[] = {0, 2, -1, -1}; + In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), + &ShufMask[0]); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, + DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::BITCAST, DL, VT, In); + } + + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + DAG.getIntPtrConstant(0)); + + SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, + DAG.getIntPtrConstant(4)); + + OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi); + + // The PSHUFB mask: + static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, + -1, -1, -1, -1, -1, -1, -1, -1}; + + SDValue Undef = DAG.getUNDEF(MVT::v16i8); + OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1); + + OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); + + // The MOVLHPS Mask: + static const int ShufMask2[] = {0, 1, 4, 5}; + SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); + return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res); + } + + // Handle truncation of V256 to V128 using shuffles. + if (!VT.is128BitVector() || !SVT.is256BitVector()) return SDValue(); - assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!"); + assert(VT.getVectorNumElements() != SVT.getVectorNumElements() && + "Invalid op"); + assert(Subtarget->hasFp256() && "256-bit vector without AVX!"); unsigned NumElems = VT.getVectorNumElements(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElems * 2); - SDValue In = Op.getOperand(0); SmallVector<int, 16> MaskVec(NumElems * 2, -1); // Prepare truncation shuffle mask for (unsigned i = 0; i != NumElems; ++i) @@ -9179,7 +9341,6 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } - SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; SDValue Op0 = Op.getOperand(0); @@ -9287,8 +9448,28 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT == MVT::v2i64) { if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) return SDValue(); - if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) - return SDValue(); + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { + // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with + // pcmpeqd + pshufd + pand. + assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); + + // First cast everything to the right type, + Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + + // Do the compare. + SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); + + // Make sure the lower and upper halves are both all-ones. + const int Mask[] = { 1, 0, 3, 2 }; + SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); + Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); + + if (Invert) + Result = DAG.getNOT(dl, Result, MVT::v4i32); + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } } // Since SSE has no unsigned integer comparisons, we need to flip the sign @@ -9544,6 +9725,53 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } +SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op->getValueType(0); + SDValue In = Op->getOperand(0); + EVT InVT = In.getValueType(); + DebugLoc dl = Op->getDebugLoc(); + + if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && + (VT != MVT::v8i32 || InVT != MVT::v8i16)) + return SDValue(); + + if (Subtarget->hasInt256()) + return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, In); + + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT + + unsigned NumElems = InVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(InVT); + + SmallVector<int,8> ShufMask1(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask1[i] = i; + + SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]); + + SmallVector<int,8> ShufMask2(NumElems, -1); + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask2[i] = i + NumElems/2; + + SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); + + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + VT.getVectorNumElements()/2); + + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); + OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); +} + // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or // ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart // from the AND / OR. @@ -9832,7 +10060,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { Chain, Dest, CC, Cond); } - // Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. // Calls to _alloca is needed to probe the stack when allocating more than 4k // bytes in one go. Touching the stack at 4K increments is necessary to ensure @@ -9998,8 +10225,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Sanity Check: Make sure using fp_offset makes sense. assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() - .getFunction()->getFnAttributes() - .hasAttribute(Attributes::NoImplicitFloat)) && + .getFunction()->getAttributes() + .hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); } @@ -10275,6 +10503,14 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + // SSE2/AVX2 sub with unsigned saturation intrinsics + case Intrinsic::x86_sse2_psubus_b: + case Intrinsic::x86_sse2_psubus_w: + case Intrinsic::x86_avx2_psubus_b: + case Intrinsic::x86_avx2_psubus_w: + return DAG.getNode(X86ISD::SUBUS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // SSE3/AVX horizontal add/sub intrinsics case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: @@ -10324,6 +10560,100 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); } + // SSE2/SSE41/AVX2 integer max/min intrinsics. + case Intrinsic::x86_sse2_pmaxu_b: + case Intrinsic::x86_sse41_pmaxuw: + case Intrinsic::x86_sse41_pmaxud: + case Intrinsic::x86_avx2_pmaxu_b: + case Intrinsic::x86_avx2_pmaxu_w: + case Intrinsic::x86_avx2_pmaxu_d: + case Intrinsic::x86_sse2_pminu_b: + case Intrinsic::x86_sse41_pminuw: + case Intrinsic::x86_sse41_pminud: + case Intrinsic::x86_avx2_pminu_b: + case Intrinsic::x86_avx2_pminu_w: + case Intrinsic::x86_avx2_pminu_d: + case Intrinsic::x86_sse41_pmaxsb: + case Intrinsic::x86_sse2_pmaxs_w: + case Intrinsic::x86_sse41_pmaxsd: + case Intrinsic::x86_avx2_pmaxs_b: + case Intrinsic::x86_avx2_pmaxs_w: + case Intrinsic::x86_avx2_pmaxs_d: + case Intrinsic::x86_sse41_pminsb: + case Intrinsic::x86_sse2_pmins_w: + case Intrinsic::x86_sse41_pminsd: + case Intrinsic::x86_avx2_pmins_b: + case Intrinsic::x86_avx2_pmins_w: + case Intrinsic::x86_avx2_pmins_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_pmaxu_b: + case Intrinsic::x86_sse41_pmaxuw: + case Intrinsic::x86_sse41_pmaxud: + case Intrinsic::x86_avx2_pmaxu_b: + case Intrinsic::x86_avx2_pmaxu_w: + case Intrinsic::x86_avx2_pmaxu_d: + Opcode = X86ISD::UMAX; + break; + case Intrinsic::x86_sse2_pminu_b: + case Intrinsic::x86_sse41_pminuw: + case Intrinsic::x86_sse41_pminud: + case Intrinsic::x86_avx2_pminu_b: + case Intrinsic::x86_avx2_pminu_w: + case Intrinsic::x86_avx2_pminu_d: + Opcode = X86ISD::UMIN; + break; + case Intrinsic::x86_sse41_pmaxsb: + case Intrinsic::x86_sse2_pmaxs_w: + case Intrinsic::x86_sse41_pmaxsd: + case Intrinsic::x86_avx2_pmaxs_b: + case Intrinsic::x86_avx2_pmaxs_w: + case Intrinsic::x86_avx2_pmaxs_d: + Opcode = X86ISD::SMAX; + break; + case Intrinsic::x86_sse41_pminsb: + case Intrinsic::x86_sse2_pmins_w: + case Intrinsic::x86_sse41_pminsd: + case Intrinsic::x86_avx2_pmins_b: + case Intrinsic::x86_avx2_pmins_w: + case Intrinsic::x86_avx2_pmins_d: + Opcode = X86ISD::SMIN; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + + // SSE/SSE2/AVX floating point max/min intrinsics. + case Intrinsic::x86_sse_max_ps: + case Intrinsic::x86_sse2_max_pd: + case Intrinsic::x86_avx_max_ps_256: + case Intrinsic::x86_avx_max_pd_256: + case Intrinsic::x86_sse_min_ps: + case Intrinsic::x86_sse2_min_pd: + case Intrinsic::x86_avx_min_ps_256: + case Intrinsic::x86_avx_min_pd_256: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse_max_ps: + case Intrinsic::x86_sse2_max_pd: + case Intrinsic::x86_avx_max_ps_256: + case Intrinsic::x86_avx_max_pd_256: + Opcode = X86ISD::FMAX; + break; + case Intrinsic::x86_sse_min_ps: + case Intrinsic::x86_sse2_min_pd: + case Intrinsic::x86_avx_min_ps_256: + case Intrinsic::x86_avx_min_pd_256: + Opcode = X86ISD::FMIN; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: @@ -10391,6 +10721,12 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(1)); + case Intrinsic::x86_sse_sqrt_ps: + case Intrinsic::x86_sse2_sqrt_pd: + case Intrinsic::x86_avx_sqrt_ps_256: + case Intrinsic::x86_avx_sqrt_pd_256: + return DAG.getNode(ISD::FSQRT, dl, Op.getValueType(), Op.getOperand(1)); + // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -10914,7 +11250,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, for (FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); I != E; ++I, ++Idx) - if (Attrs.getParamAttributes(Idx).hasAttribute(Attributes::InReg)) + if (Attrs.hasAttribute(Idx, Attribute::InReg)) // FIXME: should only count parameters that are lowered to integers. InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; @@ -11004,7 +11340,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOStore, 2, 2); @@ -11037,7 +11372,6 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, DAG.getConstant(1, MVT::i16)), DAG.getConstant(3, MVT::i16)); - return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } @@ -11166,17 +11500,43 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) { static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntArith(Op, DAG); + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. + if (VT == MVT::v4i32) { + assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() && + "Should not custom lower when pmuldq is available!"); + + // Extract the odd parts. + const int UnpackMask[] = { 1, -1, 3, -1 }; + SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); + SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); + + // Multiply the even parts. + SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B); + // Now multiply odd parts. + SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds); + + Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens); + Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds); + + // Merge the two vectors back together with a shuffle. This expands into 2 + // shuffles. + const int ShufMask[] = { 0, 4, 2, 6 }; + return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); + } + assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Only know how to lower V2I64/V4I64 multiply"); - DebugLoc dl = Op.getDebugLoc(); - // Ahi = psrlqi(a, 32); // Bhi = psrlqi(b, 32); // @@ -11188,9 +11548,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, // AhiBlo = psllqi(AhiBlo, 32); // return AloBlo + AloBhi + AhiBlo; - SDValue A = Op.getOperand(0); - SDValue B = Op.getOperand(1); - SDValue ShAmt = DAG.getConstant(32, MVT::i32); SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt); @@ -11214,6 +11571,49 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } +SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + EVT EltTy = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + SDValue N0 = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // Lower sdiv X, pow2-const. + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1)); + if (!C) + return SDValue(); + + APInt SplatValue, SplatUndef; + unsigned MinSplatBits; + bool HasAnyUndefs; + if (!C->isConstantSplat(SplatValue, SplatUndef, MinSplatBits, HasAnyUndefs)) + return SDValue(); + + if ((SplatValue != 0) && + (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) { + unsigned lg2 = SplatValue.countTrailingZeros(); + // Splat the sign bit. + SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32); + SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG); + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32); + SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG); + SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL); + SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32); + SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (SplatValue.isNonNegative()) + return SRA; + + SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy)); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts); + return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA); + } + return SDValue(); +} + SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); @@ -11567,7 +11967,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } } - static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); @@ -11652,7 +12051,6 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); } - static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { EVT T = Op.getValueType(); @@ -11821,7 +12219,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG); - case ISD::ZERO_EXTEND: return lowerZERO_EXTEND(Op, DAG); + case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); + case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG); @@ -11870,6 +12270,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); // @LOCALMOD-BEGIN case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG); case ISD::NACL_TP_TDB_OFFSET: return LowerNaClTpTdbOffset(Op, DAG); @@ -11928,6 +12329,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); @@ -11977,6 +12379,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_ROUND: { + if (!TLI.isTypeLegal(N->getOperand(0).getValueType())) + return; SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); return; @@ -12144,10 +12548,15 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; + case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; + case X86ISD::UMAX: return "X86ISD::UMAX"; + case X86ISD::UMIN: return "X86ISD::UMIN"; + case X86ISD::SMAX: return "X86ISD::SMAX"; + case X86ISD::SMIN: return "X86ISD::SMIN"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; @@ -12202,7 +12611,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; - case X86ISD::ANDN: return "X86ISD::ANDN"; case X86ISD::BLSI: return "X86ISD::BLSI"; case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; case X86ISD::BLSR: return "X86ISD::BLSR"; @@ -12305,24 +12713,21 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } - bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { - return Imm == (int32_t)Imm; + return isInt<32>(Imm); } bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { // Can also use sub to handle negated immediates. - return Imm == (int32_t)Imm; + return isInt<32>(Imm); } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { @@ -12330,9 +12735,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { @@ -14544,127 +14947,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); } - /// PerformTruncateCombine - Converts truncate operation to /// a sequence of vector shuffle operations. /// It is possible when we truncate 256-bit vector to 128-bit vector static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - if (!Subtarget->hasFp256()) - return SDValue(); - - EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); - EVT OpVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); - - if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) { - - if (Subtarget->hasInt256()) { - // AVX2: v4i64 -> v4i32 - - // VPERMD - static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; - - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op); - Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32), - ShufMask); - - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, - DAG.getIntPtrConstant(0)); - } - - // AVX: v4i64 -> v4i32 - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(0)); - - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(2)); - - OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); - - // PSHUFD - static const int ShufMask1[] = {0, 2, 0, 0}; - - SDValue Undef = DAG.getUNDEF(VT); - OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1); - OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1); - - // MOVLHPS - static const int ShufMask2[] = {0, 1, 4, 5}; - - return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2); - } - - if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) { - - if (Subtarget->hasInt256()) { - // AVX2: v8i32 -> v8i16 - - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op); - - // PSHUFB - SmallVector<SDValue,32> pshufbMask; - for (unsigned i = 0; i < 2; ++i) { - pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8)); - for (unsigned j = 0; j < 8; ++j) - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8, - &pshufbMask[0], 32); - Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV); - - Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op); - - static const int ShufMask[] = {0, 2, -1, -1}; - Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64), - &ShufMask[0]); - - Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(0)); - - return DAG.getNode(ISD::BITCAST, dl, VT, Op); - } - - SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, - DAG.getIntPtrConstant(0)); - - SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, - DAG.getIntPtrConstant(4)); - - OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi); - - // PSHUFB - static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1}; - - SDValue Undef = DAG.getUNDEF(MVT::v16i8); - OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1); - - OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); - - // MOVLHPS - static const int ShufMask2[] = {0, 1, 4, 5}; - - SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res); - } - return SDValue(); } @@ -14859,6 +15147,76 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match. +static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + if (!VT.isVector()) + return 0; + + switch (VT.getSimpleVT().SimpleTy) { + default: return 0; + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + if (!Subtarget->hasAVX2()) + return 0; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + if (!Subtarget->hasSSE2()) + return 0; + } + + // SSE2 has only a small subset of the operations. + bool hasUnsigned = Subtarget->hasSSE41() || + (Subtarget->hasSSE2() && VT == MVT::v16i8); + bool hasSigned = Subtarget->hasSSE41() || + (Subtarget->hasSSE2() && VT == MVT::v8i16); + + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + + // Check for x CC y ? x : y. + if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { + switch (CC) { + default: break; + case ISD::SETULT: + case ISD::SETULE: + return hasUnsigned ? X86ISD::UMIN : 0; + case ISD::SETUGT: + case ISD::SETUGE: + return hasUnsigned ? X86ISD::UMAX : 0; + case ISD::SETLT: + case ISD::SETLE: + return hasSigned ? X86ISD::SMIN : 0; + case ISD::SETGT: + case ISD::SETGE: + return hasSigned ? X86ISD::SMAX : 0; + } + // Check for x CC y ? y : x -- a min/max with reversed arms. + } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && + DAG.isEqualTo(RHS, Cond.getOperand(0))) { + switch (CC) { + default: break; + case ISD::SETULT: + case ISD::SETULE: + return hasUnsigned ? X86ISD::UMAX : 0; + case ISD::SETUGT: + case ISD::SETUGE: + return hasUnsigned ? X86ISD::UMIN : 0; + case ISD::SETLT: + case ISD::SETLE: + return hasSigned ? X86ISD::SMAX : 0; + case ISD::SETGT: + case ISD::SETGE: + return hasSigned ? X86ISD::SMIN : 0; + } + } + + return 0; +} + /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, @@ -15139,6 +15497,71 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } + // Match VSELECTs into subs with unsigned saturation. + if (!DCI.isBeforeLegalize() && + N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && + // psubus is available in SSE2 and AVX2 for i8 and i16 vectors. + ((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) || + (Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) { + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + + // Check if one of the arms of the VSELECT is a zero vector. If it's on the + // left side invert the predicate to simplify logic below. + SDValue Other; + if (ISD::isBuildVectorAllZeros(LHS.getNode())) { + Other = RHS; + CC = ISD::getSetCCInverse(CC, true); + } else if (ISD::isBuildVectorAllZeros(RHS.getNode())) { + Other = LHS; + } + + if (Other.getNode() && Other->getNumOperands() == 2 && + DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) { + SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1); + SDValue CondRHS = Cond->getOperand(1); + + // Look for a general sub with unsigned saturation first. + // x >= y ? x-y : 0 --> subus x, y + // x > y ? x-y : 0 --> subus x, y + if ((CC == ISD::SETUGE || CC == ISD::SETUGT) && + Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS)) + return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); + + // If the RHS is a constant we have to reverse the const canonicalization. + // x > C-1 ? x+-C : 0 --> subus x, C + if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && + isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { + APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue(); + if (CondRHS.getConstantOperandVal(0) == -A-1) { + SmallVector<SDValue, 32> V(VT.getVectorNumElements(), + DAG.getConstant(-A, VT.getScalarType())); + return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, + V.data(), V.size())); + } + } + + // Another special case: If C was a sign bit, the sub has been + // canonicalized into a xor. + // FIXME: Would it be better to use ComputeMaskedBits to determine whether + // it's safe to decanonicalize the xor? + // x s< 0 ? x^C : 0 --> subus x, C + if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && + ISD::isBuildVectorAllZeros(CondRHS.getNode()) && + isSplatVector(OpRHS.getNode())) { + APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue(); + if (A.isSignBit()) + return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); + } + } + } + + // Try to match a min/max vector operation. + if (!DCI.isBeforeLegalize() && + N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) + if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) + return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); + // If we know that this node is legal then we know that it is going to be // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits @@ -15436,7 +15859,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. @@ -15525,7 +15947,6 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { } } - // Hardware support for vector shifts is sparse which makes us scalarize the // vector operations in many cases. Also, on sandybridge ADD is faster than // shl. @@ -15669,7 +16090,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } - // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) // where both setccs reference the same FP CMP, and rewrite for CMPEQSS // and friends. Likewise for OR -> CMPNEQSS. @@ -15778,9 +16198,92 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) { return false; } +// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized +// register. In most cases we actually compare or select YMM-sized registers +// and mixing the two types creates horrible code. This method optimizes +// some of the transition sequences. +static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT.getSizeInBits() != 256) + return SDValue(); + + assert((N->getOpcode() == ISD::ANY_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); + + SDValue Narrow = N->getOperand(0); + EVT NarrowVT = Narrow->getValueType(0); + if (NarrowVT.getSizeInBits() != 128) + return SDValue(); + + if (Narrow->getOpcode() != ISD::XOR && + Narrow->getOpcode() != ISD::AND && + Narrow->getOpcode() != ISD::OR) + return SDValue(); + + SDValue N0 = Narrow->getOperand(0); + SDValue N1 = Narrow->getOperand(1); + DebugLoc DL = Narrow->getDebugLoc(); + + // The Left side has to be a trunc. + if (N0.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + // The type of the truncated inputs. + EVT WideVT = N0->getOperand(0)->getValueType(0); + if (WideVT != VT) + return SDValue(); + + // The right side has to be a 'trunc' or a constant vector. + bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; + bool RHSConst = (isSplatVector(N1.getNode()) && + isa<ConstantSDNode>(N1->getOperand(0))); + if (!RHSTrunc && !RHSConst) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT)) + return SDValue(); + + // Set N0 and N1 to hold the inputs to the new wide operation. + N0 = N0->getOperand(0); + if (RHSConst) { + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), + N1->getOperand(0)); + SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1); + N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size()); + } else if (RHSTrunc) { + N1 = N1->getOperand(0); + } + + // Generate the wide operation. + SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1); + unsigned Opcode = N->getOpcode(); + switch (Opcode) { + case ISD::ANY_EXTEND: + return Op; + case ISD::ZERO_EXTEND: { + unsigned InBits = NarrowVT.getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(InBits); + Mask = Mask.zext(VT.getScalarType().getSizeInBits()); + return DAG.getNode(ISD::AND, DL, VT, + Op, DAG.getConstant(Mask, VT)); + } + case ISD::SIGN_EXTEND: + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, + Op, DAG.getValueType(NarrowVT)); + default: + llvm_unreachable("Unexpected opcode"); + } +} + static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -15788,9 +16291,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - EVT VT = N->getValueType(0); - - // Create ANDN, BLSI, and BLSR instructions + // Create BLSI, and BLSR instructions // BLSI is X & (-X) // BLSR is X & (X-1) if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { @@ -15798,13 +16299,6 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); - // Check LHS for not - if (N0.getOpcode() == ISD::XOR && isAllOnes(N0.getOperand(1))) - return DAG.getNode(X86ISD::ANDN, DL, VT, N0.getOperand(0), N1); - // Check RHS for not - if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1))) - return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0); - // Check LHS for neg if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && isZero(N0.getOperand(0))) @@ -15857,6 +16351,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -15864,8 +16359,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -16051,6 +16544,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -16064,8 +16558,6 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, if (!Subtarget->hasBMI()) return SDValue(); - EVT VT = N->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) return SDValue(); @@ -16100,11 +16592,14 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, ISD::LoadExtType Ext = Ld->getExtensionType(); // If this is a vector EXT Load then attempt to optimize it using a - // shuffle. We need SSSE3 shuffles. + // shuffle. If SSSE3 is not available we may emit an illegal shuffle but the + // expansion is still better than scalar code. + // We generate X86ISD::VSEXT for SEXTLOADs if it's available, otherwise we'll + // emit a shuffle and a arithmetic shift. // TODO: It is possible to support ZExt by zeroing the undef values // during the shuffle phase or after the shuffle. - if (RegVT.isVector() && RegVT.isInteger() && - Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) { + if (RegVT.isVector() && RegVT.isInteger() && Subtarget->hasSSE2() && + (Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)) { assert(MemVT != RegVT && "Cannot extend to the same type"); assert(MemVT.isVector() && "Must load a vector from memory"); @@ -16113,6 +16608,9 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); + if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) + return SDValue(); + // All sizes must be a power of two. if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue(); @@ -16136,16 +16634,23 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, // Calculate the number of scalar loads that we need to perform // in order to load our vector from memory. unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); + if (Ext == ISD::SEXTLOAD && NumLoads > 1) + return SDValue(); + + unsigned loadRegZize = RegSz; + if (Ext == ISD::SEXTLOAD && RegSz == 256) + loadRegZize /= 2; // Represent our vector as a sequence of elements which are the // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy, - RegSz/SclrLoadTy.getSizeInBits()); + loadRegZize/SclrLoadTy.getSizeInBits()); // Represent the data using the same element type that is stored in // memory. In practice, we ''widen'' MemVT. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - RegSz/MemVT.getScalarType().getSizeInBits()); + EVT WideVecVT = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), + loadRegZize/MemVT.getScalarType().getSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); @@ -16186,6 +16691,41 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res); unsigned SizeRatio = RegSz/MemSz; + if (Ext == ISD::SEXTLOAD) { + // If we have SSE4.1 we can directly emit a VSEXT node. + if (Subtarget->hasSSE41()) { + SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec); + return DCI.CombineTo(N, Sext, TF, true); + } + + // Otherwise we'll shuffle the small elements in the high bits of the + // larger type and perform an arithmetic shift. If the shift is not legal + // it's better to scalarize. + if (!TLI.isOperationLegalOrCustom(ISD::SRA, RegVT)) + return SDValue(); + + // Redistribute the loaded elements into the different locations. + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i*SizeRatio + SizeRatio-1] = i; + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); + + Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); + + // Build the arithmetic shift. + unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - + MemVT.getVectorElementType().getSizeInBits(); + SmallVector<SDValue, 8> C(NumElems, + DAG.getConstant(Amt, RegVT.getScalarType())); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, RegVT, &C[0], C.size()); + Shuff = DAG.getNode(ISD::SRA, dl, RegVT, Shuff, BV); + + return DCI.CombineTo(N, Shuff, TF, true); + } + // Redistribute the loaded elements into the different locations. SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) @@ -16319,7 +16859,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Chains.size()); } - // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right @@ -16330,8 +16869,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); - bool NoImplicitFloatOps = F->getFnAttributes(). - hasAttribute(Attributes::NoImplicitFloat); + bool NoImplicitFloatOps = F->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || @@ -16625,7 +17164,6 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { N->getOperand(0), N->getOperand(1)); } - /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { // FAND(0.0, x) -> 0.0 @@ -16681,48 +17219,12 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); - EVT OpVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); - - if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || - (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { - - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op); - - // Optimize vectors in AVX mode - // Sign extend v8i16 to v8i32 and - // v4i32 to v4i64 - // - // Divide input vector into two parts - // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} - // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 - // concat the vectors to original VT - - unsigned NumElems = OpVT.getVectorNumElements(); - SDValue Undef = DAG.getUNDEF(OpVT); - - SmallVector<int,8> ShufMask1(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask1[i] = i; - - SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]); - - SmallVector<int,8> ShufMask2(NumElems, -1); - for (unsigned i = 0; i != NumElems/2; ++i) - ShufMask2[i] = i + NumElems/2; - - SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]); - - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), - VT.getVectorNumElements()/2); - - OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); - OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + if (VT.isVector() && VT.getSizeInBits() == 256) { + SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); + if (R.getNode()) + return R; } + return SDValue(); } @@ -16776,58 +17278,26 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = N->getDebugLoc(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT OpVT = N0.getValueType(); if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() != X86ISD::SETCC_CARRY) - return SDValue(); - ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!C || C->getZExtValue() != 1) - return SDValue(); - return DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, - N00.getOperand(0), N00.getOperand(1)), - DAG.getConstant(1, VT)); + if (N00.getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 1) + return SDValue(); + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, + N00.getOperand(0), N00.getOperand(1)), + DAG.getConstant(1, VT)); + } } - // Optimize vectors in AVX mode: - // - // v8i16 -> v8i32 - // Use vpunpcklwd for 4 lower elements v8i16 -> v4i32. - // Use vpunpckhwd for 4 upper elements v8i16 -> v4i32. - // Concat upper and lower parts. - // - // v4i32 -> v4i64 - // Use vpunpckldq for 4 lower elements v4i32 -> v2i64. - // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. - // Concat upper and lower parts. - // - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - if (!Subtarget->hasFp256()) - return SDValue(); - - if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || - ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { - - if (Subtarget->hasInt256()) - return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0); - - SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl); - SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec); - SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec); - - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorNumElements()/2); - - OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); + if (VT.isVector() && VT.getSizeInBits() == 256) { + SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget); + if (R.getNode()) + return R; } return SDValue(); @@ -17363,8 +17833,6 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return false; } - - /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType @@ -17441,13 +17909,13 @@ TargetLowering::ConstraintWeight case 'f': case 't': case 'u': - if (type->isFloatingPointTy()) - weight = CW_SpecificReg; - break; + if (type->isFloatingPointTy()) + weight = CW_SpecificReg; + break; case 'y': - if (type->isX86_MMXTy() && Subtarget->hasMMX()) - weight = CW_SpecificReg; - break; + if (type->isX86_MMXTy() && Subtarget->hasMMX()) + weight = CW_SpecificReg; + break; case 'x': case 'Y': if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) || @@ -17887,217 +18355,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } - -//===----------------------------------------------------------------------===// -// -// X86 cost model. -// -//===----------------------------------------------------------------------===// - -struct X86CostTblEntry { - int ISD; - MVT Type; - unsigned Cost; -}; - -static int -FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) - return i; - - // Could not find an entry. - return -1; -} - -struct X86TypeConversionCostTblEntry { - int ISD; - MVT Dst; - MVT Src; - unsigned Cost; -}; - -static int -FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, - int ISD, MVT Dst, MVT Src) { - for (unsigned int i = 0; i < len; ++i) - if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) - return i; - - // Could not find an entry. - return -1; -} - -ScalarTargetTransformInfo::PopcntHwSupport -X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const { - assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - // TODO: Currently the __builtin_popcount() implementation using SSE3 - // instructions is inefficient. Once the problem is fixed, we should - // call ST.hasSSE3() instead of ST.hasSSE4(). - return ST.hasSSE41() ? Fast : None; -} - -unsigned -X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, - Type *Ty) const { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Ty); - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - static const X86CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - }; - - // Look for AVX1 lowering tricks. - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); - if (Idx != -1) - return LT.first * AVX1CostTable[Idx].Cost; - } - // Fallback to the default implementation. - return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty); -} - -unsigned -X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - assert(Val->isVectorTy() && "This must be a vector type"); - - if (Index != -1U) { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Val); - - // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; - - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; - - // Floating point scalars are already located in index #0. - if (Val->getScalarType()->isFloatingPointTy() && Index == 0) - return 0; - } - - return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index); -} - -unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, - Type *ValTy, - Type *CondTy) const { - // Legalize the type. - std::pair<unsigned, MVT> LT = getTypeLegalizationCost(ValTy); - - MVT MTy = LT.second; - - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - const X86Subtarget &ST = - TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - static const X86CostTblEntry SSE42CostTbl[] = { - { ISD::SETCC, MVT::v2f64, 1 }, - { ISD::SETCC, MVT::v4f32, 1 }, - { ISD::SETCC, MVT::v2i64, 1 }, - { ISD::SETCC, MVT::v4i32, 1 }, - { ISD::SETCC, MVT::v8i16, 1 }, - { ISD::SETCC, MVT::v16i8, 1 }, - }; - - static const X86CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, - // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, - }; - - static const X86CostTblEntry AVX2CostTbl[] = { - { ISD::SETCC, MVT::v4i64, 1 }, - { ISD::SETCC, MVT::v8i32, 1 }, - { ISD::SETCC, MVT::v16i16, 1 }, - { ISD::SETCC, MVT::v32i8, 1 }, - }; - - if (ST.hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; - } - - if (ST.hasAVX()) { - int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX1CostTbl[Idx].Cost; - } - - if (ST.hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); - if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); -} - -unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, - Type *Dst, - Type *Src) const { - int ISD = InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); - - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); - - const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>(); - - static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, - { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, - { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, - }; - - if (ST.hasAVX()) { - int Idx = FindInConvertTable(AVXConversionTbl, - array_lengthof(AVXConversionTbl), - ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); - if (Idx != -1) - return AVXConversionTbl[Idx].Cost; - } - - return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); -} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index cad471df4a..5cd9623dfb 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -23,7 +23,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { namespace X86ISD { @@ -182,6 +181,9 @@ namespace llvm { /// BLENDI - Blend where the selector is an immediate. BLENDI, + // SUBUS - Integer sub with unsigned saturation. + SUBUS, + /// HADD - Integer horizontal add. HADD, @@ -194,6 +196,12 @@ namespace llvm { /// FHSUB - Floating point horizontal sub. FHSUB, + /// UMAX, UMIN - Unsigned integer max and min. + UMAX, UMIN, + + /// SMAX, SMIN - Signed integer max and min. + SMAX, SMIN, + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -280,8 +288,6 @@ namespace llvm { ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - ANDN, // ANDN - Bitwise AND NOT with FLAGS results. - BLSI, // BLSI - Extract lowest set isolated bit BLSMSK, // BLSMSK - Get mask up to lowest set bit BLSR, // BLSR - Reset lowest set bit @@ -505,18 +511,25 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isSafeMemOpType - Returns true if it's safe to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for all types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. Note the specified type doesn't have to be + /// legal as the hook is used before type legalization. + virtual bool isSafeMemOpType(MVT VT) const; + /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it /// is "fast" by reference in the second argument. @@ -719,7 +732,7 @@ namespace llvm { protected: std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(EVT VT) const; + findRepresentativeClass(MVT VT) const; private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can @@ -813,7 +826,9 @@ namespace llvm { SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; @@ -841,6 +856,7 @@ namespace llvm { SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; @@ -881,9 +897,8 @@ namespace llvm { virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; - virtual EVT - getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, - ISD::NodeType ExtendKind) const; + virtual MVT + getTypeForExtArgOrReturn(MVT VT, ISD::NodeType ExtendKind) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, @@ -958,31 +973,6 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } - - class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl { - public: - explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) : - ScalarTargetTransformImpl(TL) {}; - - virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const; - }; - - class X86VectorTargetTransformInfo : public VectorTargetTransformImpl { - public: - explicit X86VectorTargetTransformInfo(const TargetLowering *TL) : - VectorTargetTransformImpl(TL) {} - - virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; - - virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const; - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const; - - virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const; - }; } #endif // X86ISELLOWERING_H diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f580b76d95..4955e76c87 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -58,7 +58,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), - "mul{w}\t$src", + "mul{w}\t$src", [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in @@ -159,7 +159,7 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (X86smul_flag GR16:$src1, (load addr:$src2)))], IIC_IMUL16_RM>, TB, OpSize; -def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), +def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, @@ -183,8 +183,8 @@ let Defs = [EFLAGS] in { def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, imm:$src2))], + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, imm:$src2))], IIC_IMUL16_RRI>, OpSize; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), @@ -267,6 +267,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder +let hasSideEffects = 1 in { // so that we don't speculatively execute let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -320,12 +321,13 @@ let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX -def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), +def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), "idiv{l}\t$src", [], IIC_IDIV32>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), "idiv{q}\t$src", [], IIC_IDIV64>; } +} // hasSideEffects = 0 //===----------------------------------------------------------------------===// // Two address Instructions. @@ -413,11 +415,11 @@ def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], IIC_UNARY_REG>, @@ -431,22 +433,22 @@ def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", // In 64-bit mode, single byte INC and DEC cannot be encoded. let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can transform into LEA. -def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), +def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; -def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), +def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))], IIC_UNARY_REG>, Requires<[In64BitMode]>; -def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), +def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In64BitMode]>; -def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), +def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], IIC_UNARY_REG>, @@ -470,7 +472,7 @@ let CodeSize = 2 in { def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; - + // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. // FIXME: What is this for?? @@ -499,12 +501,12 @@ def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))], IIC_UNARY_REG>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))], IIC_UNARY_REG>, OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))], IIC_UNARY_REG>, @@ -545,57 +547,57 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass, bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> { /// VT - This is the value type itself. ValueType VT = vt; - + /// InstrSuffix - This is the suffix used on instructions with this type. For /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q". string InstrSuffix = instrsuffix; - + /// RegClass - This is the register class associated with this type. For /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64. RegisterClass RegClass = regclass; - + /// LoadNode - This is the load node associated with this type. For /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64. PatFrag LoadNode = loadnode; - + /// MemOperand - This is the memory operand associated with this type. For /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem. X86MemOperand MemOperand = memoperand; - + /// ImmEncoding - This is the encoding of an immediate of this type. For /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32 /// since the immediate fields of i64 instructions is a 32-bit sign extended /// value. ImmType ImmEncoding = immkind; - + /// ImmOperand - This is the operand kind of an immediate of this type. For /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 -> /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign /// extended value. Operand ImmOperand = immoperand; - + /// ImmOperator - This is the operator that should be used to match an /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32). SDPatternOperator ImmOperator = immoperator; - + /// Imm8Operand - This is the operand kind to use for an imm8 of this type. /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is /// only used for instructions that have a sign-extended imm8 field form. Operand Imm8Operand = imm8operand; - + /// Imm8Operator - This is the operator that should be used to match an 8-bit /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8). SDPatternOperator Imm8Operator = imm8operator; - + /// HasOddOpcode - This bit is true if the instruction should have an odd (as /// opposed to even) opcode. Operations on i8 are usually even, operations on /// other datatypes are odd. bit HasOddOpcode = hasOddOpcode; - + /// HasOpSizePrefix - This bit is set to true if the instruction should have /// the 0x66 operand size prefix. This is set for i16 types. bit HasOpSizePrefix = hasOpSizePrefix; - + /// HasREX_WPrefix - This bit is set to true if the instruction should have /// the 0x40 REX prefix. This is set for i64 types. bit HasREX_WPrefix = hasREX_WPrefix; @@ -625,12 +627,12 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, /// 3. Infers whether the instruction should have a 0x40 REX_W prefix. /// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations) /// or 1 (for i16,i32,i64 operations). -class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, +class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, string mnemonic, string args, list<dag> pattern, InstrItinClass itin = IIC_BIN_NONMEM> : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, - f, outs, ins, + f, outs, ins, !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern, itin> { @@ -691,6 +693,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; + let hasSideEffects = 0; } // BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding). @@ -700,6 +703,7 @@ class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; + let hasSideEffects = 0; } // BinOpRM - Instructions like "add reg, reg, [mem]". @@ -765,13 +769,13 @@ class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), - [(set typeinfo.RegClass:$dst, EFLAGS, + [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; // BinOpRI_RFF - Instructions like "adc reg, reg, imm". class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), - [(set typeinfo.RegClass:$dst, EFLAGS, + [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2, EFLAGS))]>; @@ -790,7 +794,7 @@ class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; - + // BinOpRI8_F - Instructions like "cmp reg, imm8". class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -853,14 +857,14 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, // BinOpMI_RMW - Instructions like "add [mem], imm". class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> - : BinOpMI<mnemonic, typeinfo, f, + : BinOpMI<mnemonic, typeinfo, f, [(store (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src), addr:$dst), (implicit EFLAGS)]>; // BinOpMI_RMW_FF - Instructions like "adc [mem], imm". class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> - : BinOpMI<mnemonic, typeinfo, f, + : BinOpMI<mnemonic, typeinfo, f, [(store (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src, EFLAGS), addr:$dst), (implicit EFLAGS)]>; @@ -868,7 +872,7 @@ class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, // BinOpMI_F - Instructions like "cmp [mem], imm". class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo, SDPatternOperator opnode, Format f, bits<8> opcode = 0x80> - : BinOpMI<mnemonic, typeinfo, f, + : BinOpMI<mnemonic, typeinfo, f, [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src))], opcode>; @@ -914,6 +918,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; + let hasSideEffects = 0; } /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is @@ -929,61 +934,61 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; - def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>; - def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>; - def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + def NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>; + def NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>; + def NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; } // isCommutable - def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; - def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; - def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; - def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; - def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; - def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>; - def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>; - def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>; + def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>; + def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; - def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; - def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; - - def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; - def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; - def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; - def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; + def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; + def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; + def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; + + def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; + def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; + def NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; } } // Constraints = "$src1 = $dst" - def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; - def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; - def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; - def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; + def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>; - def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>; - def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>; - - def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>; - def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>; - def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>; - def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>; - - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, - "{$src, %al|AL, $src}">; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, - "{$src, %ax|AX, $src}">; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, - "{$src, %eax|EAX, $src}">; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, - "{$src, %rax|RAX, $src}">; - } + def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>; + + def NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>; + + def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; + } } /// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is @@ -1000,61 +1005,61 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>; - def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>; - def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>; - def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>; + def NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>; } // isCommutable - def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; - def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; - def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; - def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; - def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>; - def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>; - def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>; - def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>; + def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>; + def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>; + def NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>; + def NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>; - - def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; - def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; } } // Constraints = "$src1 = $dst" - def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>; - def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>; - def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>; - def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>; + def NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; - def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; - def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; - - def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>; - def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; - def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; - def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; - - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, - "{$src, %al|AL, $src}">; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, - "{$src, %ax|AX, $src}">; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, - "{$src, %eax|EAX, $src}">; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, - "{$src, %rax|RAX, $src}">; - } + def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; + + def NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; + + def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; + } } /// ArithBinOp_F - This is an arithmetic binary operator where the pattern is @@ -1068,60 +1073,60 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, let Defs = [EFLAGS] in { let isCommutable = CommutableRR, isConvertibleToThreeAddress = ConvertibleToThreeAddress in { - def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>; - def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>; - def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>; + def NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; } // isCommutable - def #NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; - def #NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>; - def #NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>; - def #NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; + def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>; + def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>; + def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>; + def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>; - def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; - def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; - def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>; - def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; + def NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>; + def NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { // NOTE: These are order specific, we want the ri8 forms to be listed // first so that they are slightly preferred to the ri forms. - def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; - - def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; - def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; - def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; - def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; + def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; + def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; + def NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; } - def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; - def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>; - def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>; - def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + def NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>; + def NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>; + def NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. - def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>; - def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>; - def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>; - - def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>; - def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>; - def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>; - def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>; - - def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, - "{$src, %al|AL, $src}">; - def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, - "{$src, %ax|AX, $src}">; - def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, - "{$src, %eax|EAX, $src}">; - def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, - "{$src, %rax|RAX, $src}">; - } + def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>; + + def NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>; + def NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>; + def NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>; + def NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>; + + def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL, + "{$src, %al|AL, $src}">; + def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX, + "{$src, %ax|AX, $src}">; + def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX, + "{$src, %eax|EAX, $src}">; + def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX, + "{$src, %rax|RAX, $src}">; + } } @@ -1181,7 +1186,7 @@ let isCompare = 1, Defs = [EFLAGS] in { def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>; def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>; def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; - + def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL, "{$src, %al|AL, $src}">; def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX, @@ -1205,12 +1210,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, PatFrag ld_frag> { def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))], + [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], IIC_BIN_NONMEM>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1218,6 +1223,17 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W; } +let Predicates = [HasBMI] in { + def : Pat<(and (not GR32:$src1), GR32:$src2), + (ANDN32rr GR32:$src1, GR32:$src2)>; + def : Pat<(and (not GR64:$src1), GR64:$src2), + (ANDN64rr GR64:$src1, GR64:$src2)>; + def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)), + (ANDN32rm GR32:$src1, addr:$src2)>; + def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)), + (ANDN64rm GR64:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // MULX Instruction // diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index adeaf5410d..8f2d0a1aae 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -17,19 +17,19 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", isCommutable = 1 in { - def #NAME#16rr + def NAME#16rr : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))], IIC_CMOV16_RR>,TB,OpSize; - def #NAME#32rr + def NAME#32rr : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))], IIC_CMOV32_RR>, TB; - def #NAME#64rr + def NAME#64rr :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), [(set GR64:$dst, @@ -38,18 +38,18 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { - def #NAME#16rm + def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), CondNode, EFLAGS))], IIC_CMOV16_RM>, TB, OpSize; - def #NAME#32rm + def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), CondNode, EFLAGS))], IIC_CMOV32_RM>, TB; - def #NAME#64rm + def NAME#64rm :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index a24ddf6f99..a2d34d6086 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -523,18 +523,18 @@ def CMOV_RFP80 : I<0, Pseudo, multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> { let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { - def #NAME#8 : I<0, Pseudo, (outs GR8:$dst), - (ins i8mem:$ptr, GR8:$val), - !strconcat(mnemonic, "8 PSEUDO!"), []>; - def #NAME#16 : I<0, Pseudo,(outs GR16:$dst), - (ins i16mem:$ptr, GR16:$val), - !strconcat(mnemonic, "16 PSEUDO!"), []>; - def #NAME#32 : I<0, Pseudo, (outs GR32:$dst), - (ins i32mem:$ptr, GR32:$val), - !strconcat(mnemonic, "32 PSEUDO!"), []>; - def #NAME#64 : I<0, Pseudo, (outs GR64:$dst), - (ins i64mem:$ptr, GR64:$val), - !strconcat(mnemonic, "64 PSEUDO!"), []>; + def NAME#8 : I<0, Pseudo, (outs GR8:$dst), + (ins i8mem:$ptr, GR8:$val), + !strconcat(mnemonic, "8 PSEUDO!"), []>; + def NAME#16 : I<0, Pseudo,(outs GR16:$dst), + (ins i16mem:$ptr, GR16:$val), + !strconcat(mnemonic, "16 PSEUDO!"), []>; + def NAME#32 : I<0, Pseudo, (outs GR32:$dst), + (ins i32mem:$ptr, GR32:$val), + !strconcat(mnemonic, "32 PSEUDO!"), []>; + def NAME#64 : I<0, Pseudo, (outs GR64:$dst), + (ins i64mem:$ptr, GR64:$val), + !strconcat(mnemonic, "64 PSEUDO!"), []>; } } @@ -569,10 +569,10 @@ defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in - def #NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - !strconcat(mnemonic, "6432 PSEUDO!"), []>; + let usesCustomInserter = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in + def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + !strconcat(mnemonic, "6432 PSEUDO!"), []>; } defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">; @@ -614,77 +614,77 @@ multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { -def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, - MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - !strconcat(mnemonic, "{b}\t", +def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, + MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + !strconcat(mnemonic, "{b}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_NONMEM>, LOCK; +def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, + MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_NONMEM>, OpSize, LOCK; +def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, + MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; -def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - !strconcat(mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_NONMEM>, OpSize, LOCK; -def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, +def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - !strconcat(mnemonic, "{l}\t", + MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; -def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - !strconcat(mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_NONMEM>, LOCK; - -def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, - ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), - !strconcat(mnemonic, "{b}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), - !strconcat(mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, OpSize, LOCK; - -def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), - !strconcat(mnemonic, "{l}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; -def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), - !strconcat(mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, - ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), - !strconcat(mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, OpSize, LOCK; -def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, +def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, + ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), + !strconcat(mnemonic, "{b}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, OpSize, LOCK; + +def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), + !strconcat(mnemonic, "{l}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), + !strconcat(mnemonic, "{q}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, + ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, + ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, OpSize, LOCK; +def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, + ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, + ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), + !strconcat(mnemonic, "{l}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; +def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), - !strconcat(mnemonic, "{l}\t", + ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; -def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, - ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), - !strconcat(mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; } @@ -701,18 +701,18 @@ multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form, string mnemonic> { let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { -def #NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), - !strconcat(mnemonic, "{b}\t$dst"), - [], IIC_UNARY_MEM>, LOCK; -def #NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst), - !strconcat(mnemonic, "{w}\t$dst"), - [], IIC_UNARY_MEM>, OpSize, LOCK; -def #NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst), - !strconcat(mnemonic, "{l}\t$dst"), +def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), + !strconcat(mnemonic, "{b}\t$dst"), + [], IIC_UNARY_MEM>, LOCK; +def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst), + !strconcat(mnemonic, "{w}\t$dst"), + [], IIC_UNARY_MEM>, OpSize, LOCK; +def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst), + !strconcat(mnemonic, "{l}\t$dst"), + [], IIC_UNARY_MEM>, LOCK; +def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst), + !strconcat(mnemonic, "{q}\t$dst"), [], IIC_UNARY_MEM>, LOCK; -def #NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst), - !strconcat(mnemonic, "{q}\t$dst"), - [], IIC_UNARY_MEM>, LOCK; } } @@ -724,9 +724,9 @@ multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag, X86MemOperand x86memop, InstrItinClass itin> { let isCodeGenOnly = 1 in { - def #NAME# : I<Opc, Form, (outs), (ins x86memop:$ptr), - !strconcat(mnemonic, "\t$ptr"), - [(frag addr:$ptr)], itin>, TB, LOCK; + def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr), + !strconcat(mnemonic, "\t$ptr"), + [(frag addr:$ptr)], itin>, TB, LOCK; } } @@ -735,21 +735,21 @@ multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, InstrItinClass itin8, InstrItinClass itin> { let isCodeGenOnly = 1 in { let Defs = [AL, EFLAGS], Uses = [AL] in - def #NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), - !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), - [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK; + def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), + !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR8:$swap, 1)], itin8>, TB, LOCK; let Defs = [AX, EFLAGS], Uses = [AX] in - def #NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap), - !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"), - [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK; + def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap), + !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR16:$swap, 2)], itin>, TB, OpSize, LOCK; let Defs = [EAX, EFLAGS], Uses = [EAX] in - def #NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap), - !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"), - [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK; + def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap), + !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR32:$swap, 4)], itin>, TB, LOCK; let Defs = [RAX, EFLAGS], Uses = [RAX] in - def #NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap), - !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"), - [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK; + def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap), + !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"), + [(frag addr:$ptr, GR64:$swap, 8)], itin>, TB, LOCK; } } @@ -774,33 +774,33 @@ multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin8, InstrItinClass itin> { let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { - def #NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), - (ins GR8:$val, i8mem:$ptr), - !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), - [(set GR8:$dst, - (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], - itin8>; - def #NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$val, i16mem:$ptr), - !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), - [(set - GR16:$dst, - (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], - itin>, OpSize; - def #NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$val, i32mem:$ptr), - !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), + def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), + (ins GR8:$val, i8mem:$ptr), + !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), + [(set GR8:$dst, + (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], + itin8>; + def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$val, i16mem:$ptr), + !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), + [(set + GR16:$dst, + (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$val, i32mem:$ptr), + !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), + [(set + GR32:$dst, + (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$val, i64mem:$ptr), + !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), [(set - GR32:$dst, - (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], + GR64:$dst, + (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], itin>; - def #NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$val, i64mem:$ptr), - !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), - [(set - GR64:$dst, - (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], - itin>; } } diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index d360a73b34..7759a8a2da 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -60,14 +60,14 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm r213 : fma3p_rm<opc213, - !strconcat(OpcodeStr, !strconcat("213", PackTy)), + !strconcat(OpcodeStr, "213", PackTy), MemFrag128, MemFrag256, OpTy128, OpTy256, Op>; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm<opc132, - !strconcat(OpcodeStr, !strconcat("132", PackTy)), + !strconcat(OpcodeStr, "132", PackTy), MemFrag128, MemFrag256, OpTy128, OpTy256>; defm r231 : fma3p_rm<opc231, - !strconcat(OpcodeStr, !strconcat("231", PackTy)), + !strconcat(OpcodeStr, "231", PackTy), MemFrag128, MemFrag256, OpTy128, OpTy256>; } // neverHasSideEffects = 1 } @@ -160,15 +160,15 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, X86MemOperand x86memop, Operand memop, PatFrag mem_frag, ComplexPattern mem_cpat> { let neverHasSideEffects = 1 in { - defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)), + defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC, OpVT, mem_frag>; - defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)), + defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC, OpVT, mem_frag>; } -defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), +defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC, OpVT, mem_frag, OpNode>, - fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy), memop, mem_cpat, Int, RC>; } @@ -220,7 +220,7 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>; // For disassembler -let isCodeGenOnly = 1 in +let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, @@ -294,7 +294,7 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR256:$dst, (OpNode VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L; // For disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 09ab995166..7025e93fa1 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -27,6 +27,11 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>]>; +def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>; +def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>; +def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>; +def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>; + def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; @@ -128,6 +133,7 @@ def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 72d8b5c7fd..0e833ffb73 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -24,8 +24,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -302,7 +302,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE }, { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, @@ -360,7 +360,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, // AVX 128-bit versions of foldable instructions - { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, + { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE }, { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, @@ -472,9 +472,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int, TB_ALIGN_16 }, { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int, TB_ALIGN_16 }, { X86::SQRTSDr, X86::SQRTSDm, 0 }, { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, { X86::SQRTSSr, X86::SQRTSSm, 0 }, @@ -515,27 +513,25 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, TB_ALIGN_16 }, { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 }, - { X86::VMOVUPDrr, X86::VMOVUPDrm, TB_ALIGN_16 }, + { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, - { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 }, - { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 }, - { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 }, - { X86::VPERMILPDri, X86::VPERMILPDmi, TB_ALIGN_16 }, - { X86::VPERMILPSri, X86::VPERMILPSmi, TB_ALIGN_16 }, - { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, - { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, - { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, - { X86::VRCPPSr, X86::VRCPPSm, TB_ALIGN_16 }, - { X86::VRCPPSr_Int, X86::VRCPPSm_Int, TB_ALIGN_16 }, - { X86::VRSQRTPSr, X86::VRSQRTPSm, TB_ALIGN_16 }, - { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, TB_ALIGN_16 }, - { X86::VSQRTPDr, X86::VSQRTPDm, TB_ALIGN_16 }, - { X86::VSQRTPDr_Int, X86::VSQRTPDm_Int, TB_ALIGN_16 }, - { X86::VSQRTPSr, X86::VSQRTPSm, TB_ALIGN_16 }, - { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 }, + { X86::VPABSBrr128, X86::VPABSBrm128, 0 }, + { X86::VPABSDrr128, X86::VPABSDrm128, 0 }, + { X86::VPABSWrr128, X86::VPABSWrm128, 0 }, + { X86::VPERMILPDri, X86::VPERMILPDmi, 0 }, + { X86::VPERMILPSri, X86::VPERMILPSmi, 0 }, + { X86::VPSHUFDri, X86::VPSHUFDmi, 0 }, + { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 }, + { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, + { X86::VRCPPSr, X86::VRCPPSm, 0 }, + { X86::VRCPPSr_Int, X86::VRCPPSm_Int, 0 }, + { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, + { X86::VRSQRTPSr_Int, X86::VRSQRTPSm_Int, 0 }, + { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, + { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, @@ -546,28 +542,41 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, - { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 }, - { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 }, + { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, + { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, // AVX2 foldable instructions - { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 }, - { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 }, - { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_32 }, - { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_32 }, - { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_32 }, - { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_32 }, - { X86::VRCPPSYr, X86::VRCPPSYm, TB_ALIGN_32 }, - { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, TB_ALIGN_32 }, - { X86::VRSQRTPSYr, X86::VRSQRTPSYm, TB_ALIGN_32 }, - { X86::VRSQRTPSYr_Int, X86::VRSQRTPSYm_Int, TB_ALIGN_32 }, - { X86::VSQRTPDYr, X86::VSQRTPDYm, TB_ALIGN_32 }, - { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 }, - { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 }, - { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, + { X86::VPABSBrr256, X86::VPABSBrm256, 0 }, + { X86::VPABSDrr256, X86::VPABSDrm256, 0 }, + { X86::VPABSWrr256, X86::VPABSWrm256, 0 }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, + { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, + { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 }, + { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, + { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, + { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, - // BMI/BMI2 foldable instructions + // BMI/BMI2/LZCNT/POPCNT foldable instructions + { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, + { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, + { X86::BLSI32rr, X86::BLSI32rm, 0 }, + { X86::BLSI64rr, X86::BLSI64rm, 0 }, + { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, + { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, + { X86::BLSR32rr, X86::BLSR32rm, 0 }, + { X86::BLSR64rr, X86::BLSR64rm, 0 }, + { X86::BZHI32rr, X86::BZHI32rm, 0 }, + { X86::BZHI64rr, X86::BZHI64rm, 0 }, + { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, + { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, + { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, + { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, + { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, + { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, { X86::RORX32ri, X86::RORX32mi, 0 }, { X86::RORX64ri, X86::RORX64mi, 0 }, { X86::SARX32rr, X86::SARX32rm, 0 }, @@ -576,6 +585,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::SHRX64rr, X86::SHRX64rm, 0 }, { X86::SHLX32rr, X86::SHLX32rm, 0 }, { X86::SHLX64rr, X86::SHLX64rm, 0 }, + { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, + { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, + { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -696,21 +708,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int, TB_ALIGN_16 }, { X86::MAXSDrr, X86::MAXSDrm, 0 }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, { X86::MAXSSrr, X86::MAXSSrm, 0 }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, - { X86::MINPDrr_Int, X86::MINPDrm_Int, TB_ALIGN_16 }, { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, - { X86::MINPSrr_Int, X86::MINPSrm_Int, TB_ALIGN_16 }, { X86::MINSDrr, X86::MINSDrm, 0 }, - { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, { X86::MINSSrr, X86::MINSSrm, 0 }, - { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, @@ -761,6 +765,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, + { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, + { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, + { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, + { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, + { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, + { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, + { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, + { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 }, { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, @@ -832,31 +844,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 }, - { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 }, + { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, - { X86::VADDPDrr, X86::VADDPDrm, TB_ALIGN_16 }, - { X86::VADDPSrr, X86::VADDPSrm, TB_ALIGN_16 }, + { X86::VADDPDrr, X86::VADDPDrm, 0 }, + { X86::VADDPSrr, X86::VADDPSrm, 0 }, { X86::VADDSDrr, X86::VADDSDrm, 0 }, { X86::VADDSSrr, X86::VADDSSrm, 0 }, - { X86::VADDSUBPDrr, X86::VADDSUBPDrm, TB_ALIGN_16 }, - { X86::VADDSUBPSrr, X86::VADDSUBPSrm, TB_ALIGN_16 }, - { X86::VANDNPDrr, X86::VANDNPDrm, TB_ALIGN_16 }, - { X86::VANDNPSrr, X86::VANDNPSrm, TB_ALIGN_16 }, - { X86::VANDPDrr, X86::VANDPDrm, TB_ALIGN_16 }, - { X86::VANDPSrr, X86::VANDPSrm, TB_ALIGN_16 }, - { X86::VBLENDPDrri, X86::VBLENDPDrmi, TB_ALIGN_16 }, - { X86::VBLENDPSrri, X86::VBLENDPSrmi, TB_ALIGN_16 }, - { X86::VBLENDVPDrr, X86::VBLENDVPDrm, TB_ALIGN_16 }, - { X86::VBLENDVPSrr, X86::VBLENDVPSrm, TB_ALIGN_16 }, - { X86::VCMPPDrri, X86::VCMPPDrmi, TB_ALIGN_16 }, - { X86::VCMPPSrri, X86::VCMPPSrmi, TB_ALIGN_16 }, + { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 }, + { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 }, + { X86::VANDNPDrr, X86::VANDNPDrm, 0 }, + { X86::VANDNPSrr, X86::VANDNPSrm, 0 }, + { X86::VANDPDrr, X86::VANDPDrm, 0 }, + { X86::VANDPSrr, X86::VANDPSrm, 0 }, + { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 }, + { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 }, + { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 }, + { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 }, + { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, + { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, - { X86::VDIVPDrr, X86::VDIVPDrm, TB_ALIGN_16 }, - { X86::VDIVPSrr, X86::VDIVPSrm, TB_ALIGN_16 }, + { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, + { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, { X86::VFsANDNPDrr, X86::VFsANDNPDrm, TB_ALIGN_16 }, @@ -867,263 +879,267 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFsORPSrr, X86::VFsORPSrm, TB_ALIGN_16 }, { X86::VFsXORPDrr, X86::VFsXORPDrm, TB_ALIGN_16 }, { X86::VFsXORPSrr, X86::VFsXORPSrm, TB_ALIGN_16 }, - { X86::VHADDPDrr, X86::VHADDPDrm, TB_ALIGN_16 }, - { X86::VHADDPSrr, X86::VHADDPSrm, TB_ALIGN_16 }, - { X86::VHSUBPDrr, X86::VHSUBPDrm, TB_ALIGN_16 }, - { X86::VHSUBPSrr, X86::VHSUBPSrm, TB_ALIGN_16 }, + { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, + { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, + { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, + { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, 0 }, { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, 0 }, - { X86::VMAXPDrr, X86::VMAXPDrm, TB_ALIGN_16 }, - { X86::VMAXPDrr_Int, X86::VMAXPDrm_Int, TB_ALIGN_16 }, - { X86::VMAXPSrr, X86::VMAXPSrm, TB_ALIGN_16 }, - { X86::VMAXPSrr_Int, X86::VMAXPSrm_Int, TB_ALIGN_16 }, + { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, + { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, - { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, 0 }, { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, - { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, 0 }, - { X86::VMINPDrr, X86::VMINPDrm, TB_ALIGN_16 }, - { X86::VMINPDrr_Int, X86::VMINPDrm_Int, TB_ALIGN_16 }, - { X86::VMINPSrr, X86::VMINPSrm, TB_ALIGN_16 }, - { X86::VMINPSrr_Int, X86::VMINPSrm_Int, TB_ALIGN_16 }, + { X86::VMINPDrr, X86::VMINPDrm, 0 }, + { X86::VMINPSrr, X86::VMINPSrm, 0 }, { X86::VMINSDrr, X86::VMINSDrm, 0 }, - { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, { X86::VMINSSrr, X86::VMINSSrm, 0 }, - { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, - { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 }, - { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 }, - { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, + { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, + { X86::VMULPDrr, X86::VMULPDrm, 0 }, + { X86::VMULPSrr, X86::VMULPSrm, 0 }, { X86::VMULSDrr, X86::VMULSDrm, 0 }, { X86::VMULSSrr, X86::VMULSSrm, 0 }, - { X86::VORPDrr, X86::VORPDrm, TB_ALIGN_16 }, - { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 }, - { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 }, - { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 }, - { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 }, - { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 }, - { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 }, - { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 }, - { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 }, - { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 }, - { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 }, - { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 }, - { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 }, - { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 }, - { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 }, - { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 }, - { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, - { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 }, - { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 }, - { X86::VPBLENDWrri, X86::VPBLENDWrmi, TB_ALIGN_16 }, - { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, - { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, - { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 }, - { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 }, - { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 }, - { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, - { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, - { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, - { X86::VPHADDDrr, X86::VPHADDDrm, TB_ALIGN_16 }, - { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, - { X86::VPHADDWrr, X86::VPHADDWrm, TB_ALIGN_16 }, - { X86::VPHSUBDrr, X86::VPHSUBDrm, TB_ALIGN_16 }, - { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, - { X86::VPHSUBWrr, X86::VPHSUBWrm, TB_ALIGN_16 }, - { X86::VPERMILPDrr, X86::VPERMILPDrm, TB_ALIGN_16 }, - { X86::VPERMILPSrr, X86::VPERMILPSrm, TB_ALIGN_16 }, - { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, - { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 }, - { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, - { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 }, - { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, - { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, - { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, - { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, - { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 }, - { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, - { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 }, - { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 }, - { X86::VPMULLWrr, X86::VPMULLWrm, TB_ALIGN_16 }, - { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, - { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, - { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, - { X86::VPSHUFBrr, X86::VPSHUFBrm, TB_ALIGN_16 }, - { X86::VPSIGNBrr, X86::VPSIGNBrm, TB_ALIGN_16 }, - { X86::VPSIGNWrr, X86::VPSIGNWrm, TB_ALIGN_16 }, - { X86::VPSIGNDrr, X86::VPSIGNDrm, TB_ALIGN_16 }, - { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, - { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, - { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, - { X86::VPSRADrr, X86::VPSRADrm, TB_ALIGN_16 }, - { X86::VPSRAWrr, X86::VPSRAWrm, TB_ALIGN_16 }, - { X86::VPSRLDrr, X86::VPSRLDrm, TB_ALIGN_16 }, - { X86::VPSRLQrr, X86::VPSRLQrm, TB_ALIGN_16 }, - { X86::VPSRLWrr, X86::VPSRLWrm, TB_ALIGN_16 }, - { X86::VPSUBBrr, X86::VPSUBBrm, TB_ALIGN_16 }, - { X86::VPSUBDrr, X86::VPSUBDrm, TB_ALIGN_16 }, - { X86::VPSUBSBrr, X86::VPSUBSBrm, TB_ALIGN_16 }, - { X86::VPSUBSWrr, X86::VPSUBSWrm, TB_ALIGN_16 }, - { X86::VPSUBWrr, X86::VPSUBWrm, TB_ALIGN_16 }, - { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, TB_ALIGN_16 }, - { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, TB_ALIGN_16 }, - { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, TB_ALIGN_16 }, - { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, TB_ALIGN_16 }, - { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, TB_ALIGN_16 }, - { X86::VPXORrr, X86::VPXORrm, TB_ALIGN_16 }, - { X86::VSHUFPDrri, X86::VSHUFPDrmi, TB_ALIGN_16 }, - { X86::VSHUFPSrri, X86::VSHUFPSrmi, TB_ALIGN_16 }, - { X86::VSUBPDrr, X86::VSUBPDrm, TB_ALIGN_16 }, - { X86::VSUBPSrr, X86::VSUBPSrm, TB_ALIGN_16 }, + { X86::VORPDrr, X86::VORPDrm, 0 }, + { X86::VORPSrr, X86::VORPSrm, 0 }, + { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 }, + { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 }, + { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 }, + { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 }, + { X86::VPADDBrr, X86::VPADDBrm, 0 }, + { X86::VPADDDrr, X86::VPADDDrm, 0 }, + { X86::VPADDQrr, X86::VPADDQrm, 0 }, + { X86::VPADDSBrr, X86::VPADDSBrm, 0 }, + { X86::VPADDSWrr, X86::VPADDSWrm, 0 }, + { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 }, + { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 }, + { X86::VPADDWrr, X86::VPADDWrm, 0 }, + { X86::VPALIGNR128rr, X86::VPALIGNR128rm, 0 }, + { X86::VPANDNrr, X86::VPANDNrm, 0 }, + { X86::VPANDrr, X86::VPANDrm, 0 }, + { X86::VPAVGBrr, X86::VPAVGBrm, 0 }, + { X86::VPAVGWrr, X86::VPAVGWrm, 0 }, + { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 }, + { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 }, + { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 }, + { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 }, + { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 }, + { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 }, + { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 }, + { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 }, + { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 }, + { X86::VPHADDDrr, X86::VPHADDDrm, 0 }, + { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 }, + { X86::VPHADDWrr, X86::VPHADDWrm, 0 }, + { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 }, + { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 }, + { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 }, + { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 }, + { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 }, + { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, + { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 }, + { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, + { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, + { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, + { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, + { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, + { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, + { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, + { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, + { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, + { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, + { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, + { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, + { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, + { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, + { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, 0 }, + { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, + { X86::VPMULHWrr, X86::VPMULHWrm, 0 }, + { X86::VPMULLDrr, X86::VPMULLDrm, 0 }, + { X86::VPMULLWrr, X86::VPMULLWrm, 0 }, + { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 }, + { X86::VPORrr, X86::VPORrm, 0 }, + { X86::VPSADBWrr, X86::VPSADBWrm, 0 }, + { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 }, + { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 }, + { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 }, + { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 }, + { X86::VPSLLDrr, X86::VPSLLDrm, 0 }, + { X86::VPSLLQrr, X86::VPSLLQrm, 0 }, + { X86::VPSLLWrr, X86::VPSLLWrm, 0 }, + { X86::VPSRADrr, X86::VPSRADrm, 0 }, + { X86::VPSRAWrr, X86::VPSRAWrm, 0 }, + { X86::VPSRLDrr, X86::VPSRLDrm, 0 }, + { X86::VPSRLQrr, X86::VPSRLQrm, 0 }, + { X86::VPSRLWrr, X86::VPSRLWrm, 0 }, + { X86::VPSUBBrr, X86::VPSUBBrm, 0 }, + { X86::VPSUBDrr, X86::VPSUBDrm, 0 }, + { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 }, + { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 }, + { X86::VPSUBWrr, X86::VPSUBWrm, 0 }, + { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 }, + { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 }, + { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 }, + { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 }, + { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 }, + { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 }, + { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, + { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, + { X86::VPXORrr, X86::VPXORrm, 0 }, + { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, + { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, + { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, + { X86::VSUBPSrr, X86::VSUBPSrm, 0 }, { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, - { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, TB_ALIGN_16 }, - { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, TB_ALIGN_16 }, - { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 }, - { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, - { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, - { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }, + { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 }, + { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 }, + { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 }, + { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 }, + { X86::VXORPDrr, X86::VXORPDrm, 0 }, + { X86::VXORPSrr, X86::VXORPSrm, 0 }, // AVX 256-bit foldable instructions - { X86::VADDPDYrr, X86::VADDPDYrm, TB_ALIGN_32 }, - { X86::VADDPSYrr, X86::VADDPSYrm, TB_ALIGN_32 }, - { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, TB_ALIGN_32 }, - { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, TB_ALIGN_32 }, - { X86::VANDNPDYrr, X86::VANDNPDYrm, TB_ALIGN_32 }, - { X86::VANDNPSYrr, X86::VANDNPSYrm, TB_ALIGN_32 }, - { X86::VANDPDYrr, X86::VANDPDYrm, TB_ALIGN_32 }, - { X86::VANDPSYrr, X86::VANDPSYrm, TB_ALIGN_32 }, - { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, TB_ALIGN_32 }, - { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, TB_ALIGN_32 }, - { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, TB_ALIGN_32 }, - { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, TB_ALIGN_32 }, - { X86::VCMPPDYrri, X86::VCMPPDYrmi, TB_ALIGN_32 }, - { X86::VCMPPSYrri, X86::VCMPPSYrmi, TB_ALIGN_32 }, - { X86::VDIVPDYrr, X86::VDIVPDYrm, TB_ALIGN_32 }, - { X86::VDIVPSYrr, X86::VDIVPSYrm, TB_ALIGN_32 }, - { X86::VHADDPDYrr, X86::VHADDPDYrm, TB_ALIGN_32 }, - { X86::VHADDPSYrr, X86::VHADDPSYrm, TB_ALIGN_32 }, - { X86::VHSUBPDYrr, X86::VHSUBPDYrm, TB_ALIGN_32 }, - { X86::VHSUBPSYrr, X86::VHSUBPSYrm, TB_ALIGN_32 }, - { X86::VINSERTF128rr, X86::VINSERTF128rm, TB_ALIGN_32 }, - { X86::VMAXPDYrr, X86::VMAXPDYrm, TB_ALIGN_32 }, - { X86::VMAXPDYrr_Int, X86::VMAXPDYrm_Int, TB_ALIGN_32 }, - { X86::VMAXPSYrr, X86::VMAXPSYrm, TB_ALIGN_32 }, - { X86::VMAXPSYrr_Int, X86::VMAXPSYrm_Int, TB_ALIGN_32 }, - { X86::VMINPDYrr, X86::VMINPDYrm, TB_ALIGN_32 }, - { X86::VMINPDYrr_Int, X86::VMINPDYrm_Int, TB_ALIGN_32 }, - { X86::VMINPSYrr, X86::VMINPSYrm, TB_ALIGN_32 }, - { X86::VMINPSYrr_Int, X86::VMINPSYrm_Int, TB_ALIGN_32 }, - { X86::VMULPDYrr, X86::VMULPDYrm, TB_ALIGN_32 }, - { X86::VMULPSYrr, X86::VMULPSYrm, TB_ALIGN_32 }, - { X86::VORPDYrr, X86::VORPDYrm, TB_ALIGN_32 }, - { X86::VORPSYrr, X86::VORPSYrm, TB_ALIGN_32 }, - { X86::VPERM2F128rr, X86::VPERM2F128rm, TB_ALIGN_32 }, - { X86::VPERMILPDYrr, X86::VPERMILPDYrm, TB_ALIGN_32 }, - { X86::VPERMILPSYrr, X86::VPERMILPSYrm, TB_ALIGN_32 }, - { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, TB_ALIGN_32 }, - { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, TB_ALIGN_32 }, - { X86::VSUBPDYrr, X86::VSUBPDYrm, TB_ALIGN_32 }, - { X86::VSUBPSYrr, X86::VSUBPSYrm, TB_ALIGN_32 }, - { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, TB_ALIGN_32 }, - { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, TB_ALIGN_32 }, - { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, TB_ALIGN_32 }, - { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, TB_ALIGN_32 }, - { X86::VXORPDYrr, X86::VXORPDYrm, TB_ALIGN_32 }, - { X86::VXORPSYrr, X86::VXORPSYrm, TB_ALIGN_32 }, + { X86::VADDPDYrr, X86::VADDPDYrm, 0 }, + { X86::VADDPSYrr, X86::VADDPSYrm, 0 }, + { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 }, + { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 }, + { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 }, + { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 }, + { X86::VANDPDYrr, X86::VANDPDYrm, 0 }, + { X86::VANDPSYrr, X86::VANDPSYrm, 0 }, + { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 }, + { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 }, + { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 }, + { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 }, + { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 }, + { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 }, + { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 }, + { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 }, + { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 }, + { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 }, + { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 }, + { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, + { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, + { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, + { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, + { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, + { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, + { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, + { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, + { X86::VORPDYrr, X86::VORPDYrm, 0 }, + { X86::VORPSYrr, X86::VORPSYrm, 0 }, + { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 }, + { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 }, + { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 }, + { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 }, + { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 }, + { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 }, + { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 }, + { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 }, + { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 }, + { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 }, + { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 }, + { X86::VXORPDYrr, X86::VXORPDYrm, 0 }, + { X86::VXORPSYrr, X86::VXORPSYrm, 0 }, // AVX2 foldable instructions - { X86::VINSERTI128rr, X86::VINSERTI128rm, TB_ALIGN_16 }, - { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_32 }, - { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_32 }, - { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_32 }, - { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_32 }, - { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_32 }, - { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_32 }, - { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_32 }, - { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_32 }, - { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_32 }, - { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_32 }, - { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_32 }, - { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_32 }, - { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_32 }, - { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_32 }, - { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_32 }, - { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_32 }, - { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_32 }, - { X86::VPBLENDDrri, X86::VPBLENDDrmi, TB_ALIGN_32 }, - { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, TB_ALIGN_32 }, - { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, TB_ALIGN_32 }, - { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_32 }, - { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_32 }, - { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_32 }, - { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_32 }, - { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_32 }, - { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_32 }, - { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_32 }, - { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 }, - { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 }, - { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 }, - { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 }, - { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, - { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 }, - { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, - { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, - { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, - { X86::VPHSUBDYrr, X86::VPHSUBDYrm, TB_ALIGN_32 }, - { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_32 }, - { X86::VPHSUBWYrr, X86::VPHSUBWYrm, TB_ALIGN_32 }, - { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_32 }, - { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_32 }, - { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_32 }, - { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_32 }, - { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_32 }, - { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_32 }, - { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_32 }, - { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_32 }, - { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_32 }, - { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_32 }, - { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_32 }, - { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_32 }, - { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_32 }, - { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_32 }, - { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_32 }, - { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_32 }, - { X86::VPSHUFBYrr, X86::VPSHUFBYrm, TB_ALIGN_32 }, - { X86::VPSIGNBYrr, X86::VPSIGNBYrm, TB_ALIGN_32 }, - { X86::VPSIGNWYrr, X86::VPSIGNWYrm, TB_ALIGN_32 }, - { X86::VPSIGNDYrr, X86::VPSIGNDYrm, TB_ALIGN_32 }, - { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, - { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, - { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, - { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 }, - { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_32 }, - { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 }, - { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_32 }, - { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 }, - { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 }, - { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 }, - { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_32 }, - { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 }, - { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 }, - { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 }, - { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 }, - { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_32 }, - { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 }, - { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_32 }, - { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_32 }, - { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_32 }, - { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_32 }, - { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_32 }, - { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_32 }, - { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_32 }, - { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_32 }, - { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 }, - { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_32 }, - { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_32 }, - { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_32 }, + { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 }, + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 }, + { X86::VPADDBYrr, X86::VPADDBYrm, 0 }, + { X86::VPADDDYrr, X86::VPADDDYrm, 0 }, + { X86::VPADDQYrr, X86::VPADDQYrm, 0 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 }, + { X86::VPADDWYrr, X86::VPADDWYrm, 0 }, + { X86::VPALIGNR256rr, X86::VPALIGNR256rm, 0 }, + { X86::VPANDNYrr, X86::VPANDNYrm, 0 }, + { X86::VPANDYrr, X86::VPANDYrm, 0 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 }, + { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 }, + { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 }, + { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 }, + { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 }, + { X86::VPERMDYrr, X86::VPERMDYrm, 0 }, + { X86::VPERMPDYri, X86::VPERMPDYmi, 0 }, + { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 }, + { X86::VPERMQYri, X86::VPERMQYmi, 0 }, + { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 }, + { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 }, + { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, + { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, + { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, + { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, + { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, + { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, + { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, + { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, + { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, + { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, + { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, + { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, 0 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 }, + { X86::VPORYrr, X86::VPORYrm, 0 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 }, + { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 }, + { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 }, + { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 }, + { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 }, + { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 }, + { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 }, + { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 }, + { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 }, + { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 }, + { X86::VPSRADYrr, X86::VPSRADYrm, 0 }, + { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 }, + { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 }, + { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 }, + { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 }, + { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 }, + { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 }, + { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 }, + { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 }, + { X86::VPXORYrr, X86::VPXORYrm, 0 }, // FIXME: add AVX 256-bit foldable instructions // FMA4 foldable patterns @@ -1161,8 +1177,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4mrY, TB_ALIGN_32 }, // BMI/BMI2 foldable instructions + { X86::ANDN32rr, X86::ANDN32rm, 0 }, + { X86::ANDN64rr, X86::ANDN64rm, 0 }, { X86::MULX32rr, X86::MULX32rm, 0 }, { X86::MULX64rr, X86::MULX64rm, 0 }, + { X86::PDEP32rr, X86::PDEP32rm, 0 }, + { X86::PDEP64rr, X86::PDEP64rm, 0 }, + { X86::PEXT32rr, X86::PEXT32rm, 0 }, + { X86::PEXT64rr, X86::PEXT64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -2848,6 +2870,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - colobbersTheStack. if (SrcReg == X86::EFLAGS) { if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); @@ -3158,19 +3182,15 @@ inline static bool isDefConvertible(MachineInstr *MI) { case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: - case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: - case X86::DEC64m: case X86::DEC32m: case X86::DEC16m: case X86::DEC8m: + case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r: case X86::DEC64_32r: case X86::DEC64_16r: - case X86::DEC64_32m: case X86::DEC64_16m: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: - case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: - case X86::INC64m: case X86::INC32m: case X86::INC16m: case X86::INC8m: + case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: case X86::INC64_32r: case X86::INC64_16r: - case X86::INC64_32m: case X86::INC64_16m: case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: @@ -3186,6 +3206,8 @@ inline static bool isDefConvertible(MachineInstr *MI) { case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: + case X86::ANDN32rr: case X86::ANDN32rm: + case X86::ANDN64rr: case X86::ANDN64rm: return true; } } @@ -3508,43 +3530,44 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, /// to: /// %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef> /// -static bool Expand2AddrUndef(MachineInstr *MI, const MCInstrDesc &Desc) { +static bool Expand2AddrUndef(MachineInstrBuilder &MIB, + const MCInstrDesc &Desc) { assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction."); - unsigned Reg = MI->getOperand(0).getReg(); - MI->setDesc(Desc); + unsigned Reg = MIB->getOperand(0).getReg(); + MIB->setDesc(Desc); // MachineInstr::addOperand() will insert explicit operands before any // implicit operands. - MachineInstrBuilder(MI).addReg(Reg, RegState::Undef) - .addReg(Reg, RegState::Undef); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); // But we don't trust that. - assert(MI->getOperand(1).getReg() == Reg && - MI->getOperand(2).getReg() == Reg && "Misplaced operand"); + assert(MIB->getOperand(1).getReg() == Reg && + MIB->getOperand(2).getReg() == Reg && "Misplaced operand"); return true; } bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { case X86::SETB_C8r: - return Expand2AddrUndef(MI, get(X86::SBB8rr)); + return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: - return Expand2AddrUndef(MI, get(X86::SBB16rr)); + return Expand2AddrUndef(MIB, get(X86::SBB16rr)); case X86::SETB_C32r: - return Expand2AddrUndef(MI, get(X86::SBB32rr)); + return Expand2AddrUndef(MIB, get(X86::SBB32rr)); case X86::SETB_C64r: - return Expand2AddrUndef(MI, get(X86::SBB64rr)); + return Expand2AddrUndef(MIB, get(X86::SBB64rr)); case X86::V_SET0: case X86::FsFLD0SS: case X86::FsFLD0SD: - return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); + return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::AVX_SET0: assert(HasAVX && "AVX not supported"); - return Expand2AddrUndef(MI, get(X86::VXORPSYrr)); + return Expand2AddrUndef(MIB, get(X86::VXORPSYrr)); case X86::V_SETALLONES: - return Expand2AddrUndef(MI, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); + return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); case X86::AVX2_SETALLONES: - return Expand2AddrUndef(MI, get(X86::VPCMPEQDYrr)); + return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; @@ -3570,9 +3593,10 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MachineInstr *MI, const TargetInstrInfo &TII) { // Create the base instruction with the memory operand as the first part. + // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); - MachineInstrBuilder MIB(NewMI); + MachineInstrBuilder MIB(MF, NewMI); unsigned NumAddrOps = MOs.size(); for (unsigned i = 0; i != NumAddrOps; ++i) MIB.addOperand(MOs[i]); @@ -3596,9 +3620,10 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, const SmallVectorImpl<MachineOperand> &MOs, MachineInstr *MI, const TargetInstrInfo &TII) { + // Omit the implicit operands, something BuildMI can't do. MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); - MachineInstrBuilder MIB(NewMI); + MachineInstrBuilder MIB(MF, NewMI); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -3845,8 +3870,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize) && + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -3887,8 +3912,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Unless optimizing for size, don't fold to avoid partial // register update stalls - if (!MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize) && + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) return 0; @@ -4138,7 +4163,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the data processing instruction. MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true); - MachineInstrBuilder MIB(DataMI); + MachineInstrBuilder MIB(MF, DataMI); if (FoldedStore) MIB.addReg(Reg, RegState::Define); @@ -4644,13 +4669,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::DIVSSrr: case X86::DIVSSrr_Int: case X86::SQRTPDm: - case X86::SQRTPDm_Int: case X86::SQRTPDr: - case X86::SQRTPDr_Int: case X86::SQRTPSm: - case X86::SQRTPSm_Int: case X86::SQRTPSr: - case X86::SQRTPSr_Int: case X86::SQRTSDm: case X86::SQRTSDm_Int: case X86::SQRTSDr: @@ -4669,13 +4690,9 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVSSrr: case X86::VDIVSSrr_Int: case X86::VSQRTPDm: - case X86::VSQRTPDm_Int: case X86::VSQRTPDr: - case X86::VSQRTPDr_Int: case X86::VSQRTPSm: - case X86::VSQRTPSm_Int: case X86::VSQRTPSr: - case X86::VSQRTPSr_Int: case X86::VSQRTSDm: case X86::VSQRTSDm_Int: case X86::VSQRTSDr: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 921ec0d84b..ba5dfae137 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -262,9 +262,9 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; -def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>; -def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>; -def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>; +def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>; +def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>; +def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -1066,7 +1066,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), */ -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -1156,24 +1156,26 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "bt{w}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi16 addr:$src1), GR16:$src2), -// (implicit EFLAGS)] - [], IIC_BT_MR - >, OpSize, TB, Requires<[FastBTMem]>; -def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "bt{l}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi32 addr:$src1), GR32:$src2), -// (implicit EFLAGS)] - [], IIC_BT_MR - >, TB, Requires<[FastBTMem]>; -def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi64 addr:$src1), GR64:$src2), -// (implicit EFLAGS)] - [], IIC_BT_MR - >, TB; +let mayLoad = 1, hasSideEffects = 0 in { + def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), + "bt{w}\t{$src2, $src1|$src1, $src2}", + // [(X86bt (loadi16 addr:$src1), GR16:$src2), + // (implicit EFLAGS)] + [], IIC_BT_MR + >, OpSize, TB, Requires<[FastBTMem]>; + def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), + "bt{l}\t{$src2, $src1|$src1, $src2}", + // [(X86bt (loadi32 addr:$src1), GR32:$src2), + // (implicit EFLAGS)] + [], IIC_BT_MR + >, TB, Requires<[FastBTMem]>; + def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + // [(X86bt (loadi64 addr:$src1), GR64:$src2), + // (implicit EFLAGS)] + [], IIC_BT_MR + >, TB; +} def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -1204,7 +1206,7 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; - +let hasSideEffects = 0 in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1212,6 +1214,8 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1219,6 +1223,8 @@ def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; +} + def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1226,6 +1232,8 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1233,6 +1241,7 @@ def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; +} def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, @@ -1241,6 +1250,8 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1248,6 +1259,8 @@ def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; +} + def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1255,6 +1268,8 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1262,6 +1277,7 @@ def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; +} def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, @@ -1270,6 +1286,8 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1277,6 +1295,8 @@ def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; +} + def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1284,6 +1304,8 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; + +let mayLoad = 1, mayStore = 1 in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1291,6 +1313,8 @@ def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; +} +} // hasSideEffects = 0 } // Defs = [EFLAGS] @@ -1303,34 +1327,34 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin> { let Constraints = "$val = $dst" in { - def #NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst), - (ins GR8:$val, i8mem:$ptr), - !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), - [(set - GR8:$dst, - (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], - itin>; - def #NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$val, i16mem:$ptr), - !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), - [(set - GR16:$dst, - (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], - itin>, OpSize; - def #NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$val, i32mem:$ptr), - !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), + def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst), + (ins GR8:$val, i8mem:$ptr), + !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), + [(set + GR8:$dst, + (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))], + itin>; + def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$val, i16mem:$ptr), + !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), + [(set + GR16:$dst, + (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$val, i32mem:$ptr), + !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), + [(set + GR32:$dst, + (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$val, i64mem:$ptr), + !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), [(set - GR32:$dst, - (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))], + GR64:$dst, + (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], itin>; - def #NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$val, i64mem:$ptr), - !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), - [(set - GR64:$dst, - (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))], - itin>; } } @@ -1509,10 +1533,10 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), Requires<[In32BitMode]>; // Adjust RPL Field of Segment Selector -def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst), +def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>, Requires<[In32BitMode]>; -def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), +def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; @@ -1628,26 +1652,26 @@ multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM, PatFrag ld_frag> { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V; + [(set RC:$dst, (OpNode RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V; def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>, + [(set RC:$dst, (OpNode (ld_frag addr:$src))), (implicit EFLAGS)]>, T8, VEX_4V; } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, - X86blsr_flag, loadi32>; + X86blsr, loadi32>; defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, - X86blsr_flag, loadi64>, VEX_W; + X86blsr, loadi64>, VEX_W; defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, - X86blsmsk_flag, loadi32>; + X86blsmsk, loadi32>; defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, - X86blsmsk_flag, loadi64>, VEX_W; + X86blsmsk, loadi64>, VEX_W; defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, - X86blsi_flag, loadi32>; + X86blsi, loadi32>; defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, - X86blsi_flag, loadi64>, VEX_W; + X86blsi, loadi64>, VEX_W; } multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC, @@ -1915,6 +1939,8 @@ def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>; def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>; def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>; def : InstAlias<"fxch", (XCH_F ST1)>; +def : InstAlias<"fcom", (COM_FST0r ST1)>; +def : InstAlias<"fcomp", (COMP_FST0r ST1)>; def : InstAlias<"fcomi", (COM_FIr ST1)>; def : InstAlias<"fcompi", (COM_FIPr ST1)>; def : InstAlias<"fucom", (UCOM_Fr ST1)>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1912a936ce..89149c65bf 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -203,9 +203,8 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, list<dag> pat_rr, list<dag> pat_rm, - bit Is2Addr = 1, - bit rr_hasSideEffects = 0> { - let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in + bit Is2Addr = 1> { + let isCommutable = 1, hasSideEffects = 0 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), @@ -218,27 +217,6 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, pat_rm, IIC_DEFAULT, d>; } -/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class -multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, - string asm, string SSEVer, string FPSizeStr, - X86MemOperand x86memop, PatFrag mem_frag, - Domain d, OpndItins itins, bit Is2Addr = 1> { - def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), - !if(Is2Addr, - !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!cast<Intrinsic>( - !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], IIC_DEFAULT, d>; - def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), - !if(Is2Addr, - !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!cast<Intrinsic>( - !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>; -} - //===----------------------------------------------------------------------===// // Non-instruction patterns //===----------------------------------------------------------------------===// @@ -481,7 +459,7 @@ def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, VEX_LIG; // For the disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], @@ -519,7 +497,7 @@ let Constraints = "$src1 = $dst" in { "movsd\t{$src2, $dst|$dst, $src2}">, XD; // For the disassembler - let isCodeGenOnly = 1 in { + let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), "movss\t{$src2, $dst|$dst, $src2}", [], @@ -870,7 +848,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), IIC_SSE_MOVU_P_MR>, VEX, VEX_L; // For disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -944,7 +922,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), IIC_SSE_MOVU_P_MR>; // For disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1132,34 +1110,41 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), // SSE 1 & 2 - Move Low packed FP Instructions //===----------------------------------------------------------------------===// -multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, - SDNode psnode, SDNode pdnode, string base_opc, - string asm_opr, InstrItinClass itin> { +multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, + string base_opc, string asm_opr, + InstrItinClass itin> { def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "s", asm_opr), - [(set RC:$dst, - (psnode RC:$src1, + [(set VR128:$dst, + (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], itin, SSEPackedSingle>, TB; def PDrm : PI<opc, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f64mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), - [(set RC:$dst, (v2f64 (pdnode RC:$src1, + [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], itin, SSEPackedDouble>, TB, OpSize; + } -let AddedComplexity = 20 in { - defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; +multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode, + string base_opc, InstrItinClass itin> { + defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", + itin>, VEX_4V; + +let Constraints = "$src1 = $dst" in + defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, + "\t{$src2, $dst|$dst, $src2}", + itin>; } -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + +let AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", + IIC_SSE_MOV_LH>; } def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1257,14 +1242,8 @@ let Predicates = [UseSSE2] in { //===----------------------------------------------------------------------===// let AddedComplexity = 20 in { - defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $src1, $dst|$dst, $src1, $src2}", - IIC_SSE_MOV_LH>, VEX_4V; -} -let Constraints = "$src1 = $dst", AddedComplexity = 20 in { - defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", - "\t{$src2, $dst|$dst, $src2}", - IIC_SSE_MOV_LH>; + defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", + IIC_SSE_MOV_LH>; } // v2f64 extract element 1 is always custom lowered to unpack high to low @@ -1457,7 +1436,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si{q}\t{$src, $dst|$dst, $src}", + "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, @@ -1465,26 +1444,43 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + "cvttsd2si\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; +def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; +def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; +def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; +def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; +def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; +def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; +def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; +def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; + // The assembler can recognize rr 64-bit instructions by seeing a rxx // register, but the same isn't true when only using memory operands, // provide other assembly "l" and "q" forms to address this explicitly // where appropriate to do so. -defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, XS, VEX_4V, VEX_LIG; defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, VEX_4V, VEX_W, VEX_LIG; -defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; -def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>; -def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", +def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; +def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; let Predicates = [HasAVX] in { @@ -1511,27 +1507,49 @@ defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_32>, XS; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si{q}\t{$src, $dst|$dst, $src}", + "cvttss2si\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, REX_W; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si{q}\t{$src, $dst|$dst, $src}", + "cvttsd2si\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, REX_W; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, - "cvtsi2ss\t{$src, $dst|$dst, $src}", + "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XS; defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XS, REX_W; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, - "cvtsi2sd\t{$src, $dst|$dst, $src}", + "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XD; defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", SSE_CVT_Scalar>, XD, REX_W; +def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; +def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; +def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; +def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; +def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; +def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; +def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; +def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; + +def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", + (CVTSI2SSrm FR64:$dst, i32mem:$src)>; +def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", + (CVTSI2SDrm FR64:$dst, i32mem:$src)>; + // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). @@ -1566,27 +1584,27 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, - int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}", + int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}", + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; + sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", SSE_CVT_Scalar, 0>, XS, VEX_4V; defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar, 0>, XS, VEX_4V, VEX_W; defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", SSE_CVT_Scalar, 0>, XD, VEX_4V; defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", @@ -1596,13 +1614,13 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, - "cvtsi2ss", SSE_CVT_Scalar>, XS; + "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, - "cvtsi2sd", SSE_CVT_Scalar>, XD; + "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; @@ -1616,40 +1634,40 @@ defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, SSE_CVT_SS2SI_32>, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, + "cvttss2si", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si{q}", SSE_CVT_SD2SI>, + "cvttsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_W; defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, ssmem, sse_load_f32, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W; + "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, sdmem, sse_load_f64, "cvttsd2si", SSE_CVT_SD2SI>, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, - "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si{l}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si{q}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si{l}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_32>, XS; defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si{q}", + ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, REX_W; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, @@ -1666,6 +1684,40 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, SSEPackedSingle, SSE_CVT_PS>, TB, Requires<[UseSSE2]>; +def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; +def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", + (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; +def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; +def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", + (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; + +def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", + (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", + (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; +def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; +def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", + (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; +def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", + (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", + (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; +def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; +def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", + (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; + /// SSE 2 Only // Convert scalar double to scalar single @@ -2657,10 +2709,8 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, - OpndItins itins, - bit IsCommutable = 0, - bit Is2Addr = 1> { + X86MemOperand x86memop, OpndItins itins, + bit IsCommutable, bit Is2Addr> { let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), @@ -2679,40 +2729,30 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, } } // ExeDomain = SSEPackedInt -// These are ordered here for pattern ordering requirements with the fp versions +multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, + ValueType OpVT128, ValueType OpVT256, + OpndItins itins, bit IsCommutable = 0> { +let Predicates = [HasAVX] in + defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, + VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V; -let Predicates = [HasAVX] in { -defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; -defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; -defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; -defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, + memopv2i64, i128mem, itins, IsCommutable, 1>; + +let Predicates = [HasAVX2] in + defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, + OpVT256, VR256, memopv4i64, i256mem, itins, + IsCommutable, 0>, VEX_4V, VEX_L; } -let Constraints = "$src1 = $dst" in { -defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1>; -defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1>; -defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 1>; -defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, - i128mem, SSE_BIT_ITINS_P, 0>; -} // Constraints = "$src1 = $dst" +// These are ordered here for pattern ordering requirements with the fp versions -let Predicates = [HasAVX2] in { -defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, - i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L; -} +defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, SSE_BIT_ITINS_P, 1>; +defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, + SSE_BIT_ITINS_P, 0>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions @@ -2757,6 +2797,20 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in /// multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, SDNode OpNode> { + defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f256mem, + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L; + + defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f256mem, + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, + TB, OpSize, VEX_4V, VEX_L; + // In AVX no need to add a pattern for 128-bit logical rr ps, because they // are all promoted to v2i64, and the patterns are covered by the int // version. This is needed in SSE only, because v2i64 isn't supported on @@ -2764,7 +2818,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, [], [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V; + (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, @@ -2773,6 +2827,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), (memopv2i64 addr:$src2)))], 0>, TB, OpSize, VEX_4V; + let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, @@ -2789,31 +2844,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, } } -/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms -/// -multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, - SDNode OpNode> { - defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, - !strconcat(OpcodeStr, "ps"), f256mem, - [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], - [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L; - - defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, - !strconcat(OpcodeStr, "pd"), f256mem, - [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), - (bc_v4i64 (v4f64 VR256:$src2))))], - [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), - (memopv4i64 addr:$src2)))], 0>, - TB, OpSize, VEX_4V, VEX_L; -} - -// AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; -defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; -defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; - defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; @@ -2848,26 +2878,32 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, itins.d, Is2Addr>, XD; } -multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, - SizeItins itins, - bit Is2Addr = 1> { +multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, + SDNode OpNode, SizeItins itins> { +let Predicates = [HasAVX] in { + defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, + VR128, v4f32, f128mem, memopv4f32, + SSEPackedSingle, itins.s, 0>, TB, VEX_4V; + defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, + VR128, v2f64, f128mem, memopv2f64, + SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V; + + defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), + OpNode, VR256, v8f32, f256mem, memopv8f32, + SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L; + defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), + OpNode, VR256, v4f64, f256mem, memopv4f64, + SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, - v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>, - TB; + v4f32, f128mem, memopv4f32, SSEPackedSingle, + itins.s, 1>, TB; defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, - v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>, - TB, OpSize; + v2f64, f128mem, memopv2f64, SSEPackedDouble, + itins.d, 1>, TB, OpSize; } - -multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, - SDNode OpNode, - SizeItins itins> { - defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, - v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>, - TB, VEX_L; - defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, - v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>, - TB, OpSize, VEX_L; } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, @@ -2881,116 +2917,69 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, itins.d, Is2Addr>, XD; } -multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, - SizeItins itins, - bit Is2Addr = 1> { - defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32, - SSEPackedSingle, itins.s, Is2Addr>, - TB; - - defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, - !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64, - SSEPackedDouble, itins.d, Is2Addr>, - TB, OpSize; +// Binary Arithmetic instructions +defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>; +defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>; +let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>; + defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>; + defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>; + defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>; } -multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr, - SizeItins itins> { - defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, - !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, - SSEPackedSingle, itins.s, 0>, TB, VEX_L; - - defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, - !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, - SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L; +let isCodeGenOnly = 1 in { + defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; + defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; } -// Binary Arithmetic instructions defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; -defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>, - VEX_4V; defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>, VEX_4V, VEX_LIG; -defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>, - VEX_4V; let isCommutable = 0 in { defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, - VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, - VEX_4V; defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, - basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>, - VEX_4V; defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, - VEX_4V; } let Constraints = "$src1 = $dst" in { defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, - basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, - basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; + basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, - basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; + basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; } } let isCodeGenOnly = 1 in { defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V; defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; - defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V; let Constraints = "$src1 = $dst" in { - defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; - defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; + defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; + defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; } } @@ -3021,6 +3010,26 @@ def SSE_RCPS : OpndItins< /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + !strconcat("v", OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1,f32mem:$src2), + !strconcat("v", OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat("v", OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + } +} + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode FR32:$src))]>; @@ -3040,49 +3049,115 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; } -/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. -multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; +/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. +multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), + (ins FR32:$src1, FR32:$src2), + !strconcat("v", OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; let mayLoad = 1 in { - def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, - "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), + (ins FR32:$src1,f32mem:$src2), + !strconcat("v", OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat("v", OpcodeStr, + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + } +} + + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, + Requires<[UseSSE1, OptForSize]>; + let Constraints = "$src1 = $dst" in { + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rr>; + let mayLoad = 1, hasSideEffects = 0 in + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), + [], itins.rm>; } } /// sse1_fp_unop_p - SSE1 unops in packed form. multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { +let Predicates = [HasAVX] in { + def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], + itins.rr>, VEX; + def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], + itins.rm>, VEX; + def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], + itins.rr>, VEX, VEX_L; + def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], + itins.rm>, VEX, VEX_L; +} + def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; } -/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. -multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX_L; - def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX_L; -} - /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, - Intrinsic V4F32Int, OpndItins itins> { + Intrinsic V4F32Int, Intrinsic V8F32Int, + OpndItins itins> { +let Predicates = [HasAVX] in { + def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (V4F32Int VR128:$src))], + itins.rr>, VEX; + def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], + itins.rm>, VEX; + def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V8F32Int VR256:$src))], + itins.rr>, VEX, VEX_L; + def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), + (ins f256mem:$src), + !strconcat("v", OpcodeStr, + "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], + itins.rm>, VEX, VEX_L; +} + def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], @@ -3093,22 +3168,29 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, itins.rm>; } -/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. -multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, - Intrinsic V4F32Int, OpndItins itins> { - def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V4F32Int VR256:$src))], - itins.rr>, VEX_L; - def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX_L; -} - /// sse2_fp_unop_s - SSE2 unops in scalar form. multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, Intrinsic F64Int, OpndItins itins> { +let Predicates = [HasAVX], hasSideEffects = 0 in { + def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), + (ins FR64:$src1, FR64:$src2), + !strconcat("v", OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + let mayLoad = 1 in { + def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1,f64mem:$src2), + !strconcat("v", OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, sdmem:$src2), + !strconcat("v", OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_LIG; + } +} + def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; @@ -3125,26 +3207,32 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; } -/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. -let hasSideEffects = 0 in -multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { - def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - let mayLoad = 1 in { - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, - "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; - } -} - /// sse2_fp_unop_p - SSE2 unops in vector forms. multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { +let Predicates = [HasAVX] in { + def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat("v", OpcodeStr, + "pd\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], + itins.rr>, VEX; + def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat("v", OpcodeStr, + "pd\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], + itins.rm>, VEX; + def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat("v", OpcodeStr, + "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], + itins.rr>, VEX, VEX_L; + def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat("v", OpcodeStr, + "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], + itins.rm>, VEX, VEX_L; +} + def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; @@ -3153,82 +3241,24 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; } -/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. -multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX_L; - def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX_L; -} - -/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. -multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, - Intrinsic V2F64Int, OpndItins itins> { - def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V2F64Int VR128:$src))], - itins.rr>; - def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))], - itins.rm>; -} - -/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. -multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, - Intrinsic V2F64Int, OpndItins itins> { - def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V2F64Int VR256:$src))], - itins.rr>, VEX_L; - def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))], - itins.rm>, VEX_L; -} +// Square root. +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, + SSE_SQRTS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, + SSE_SQRTS>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; -let Predicates = [HasAVX] in { - // Square root. - defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, - sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; - - defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, - sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps, - SSE_SQRTP>, - sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd, - SSE_SQRTP>, - sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256, - SSE_SQRTP>, - sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256, - SSE_SQRTP>, - VEX; - - // Reciprocal approximations. Note that these typically require refinement - // in order to obtain suitable precision. - defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; - defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, - sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, - sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256, - SSE_SQRTP>, - sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps, - SSE_SQRTP>, VEX; - - defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; - defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>, - sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>, - sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256, - SSE_RCPP>, - sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps, - SSE_RCPP>, VEX; -} +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, + int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; +defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, + sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, + int_x86_avx_rcp_ps_256, SSE_RCPP>; def : Pat<(f32 (fsqrt FR32:$src)), (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; @@ -3283,59 +3313,11 @@ let Predicates = [HasAVX] in { (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } -// Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, - sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, - sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>; - -/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. -multiclass sse1_fp_unop_rw<bits<8> opc, string OpcodeStr, SDNode OpNode, - Intrinsic F32Int, OpndItins itins> { - def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; - // For scalar unary operations, fold a load into the operation - // only in OptForSize mode. It eliminates an instruction, but it also - // eliminates a whole-register clobber (the load), so it introduces a - // partial register update condition. - def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; - let Constraints = "$src1 = $dst" in { - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; - } -} - // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, - SSE_SQRTS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, - sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - SSE_SQRTS>; let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), (RSQRTSSr_Int VR128:$src, VR128:$src)>; -} - -defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, - SSE_RCPS>, - sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>, - sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>; -let Predicates = [UseSSE1] in { def : Pat<(int_x86_sse_rcp_ss VR128:$src), (RCPSSr_Int VR128:$src, VR128:$src)>; } @@ -3508,7 +3490,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3571,7 +3553,7 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; // For Disassembler -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, hasSideEffects = 0 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3650,6 +3632,24 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, itins.rm>; } +multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, + Intrinsic IntId256, OpndItins itins, + bit IsCommutable = 0> { +let Predicates = [HasAVX] in + defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128, + VR128, memopv2i64, i128mem, itins, + IsCommutable, 0>, VEX_4V; + +let Constraints = "$src1 = $dst" in + defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64, + i128mem, itins, IsCommutable, 1>; + +let Predicates = [HasAVX2] in + defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256, + VR256, memopv4i64, i256mem, itins, + IsCommutable, 0>, VEX_4V, VEX_L; +} + multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, SDNode OpNode, SDNode OpNode2, RegisterClass RC, @@ -3679,7 +3679,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; } -/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types +/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType DstVT, ValueType SrcVT, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop, @@ -3702,249 +3702,75 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, } } // ExeDomain = SSEPackedInt -// 128-bit Integer Arithmetic +defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, + SSE_INTALU_ITINS_P, 1>; +defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, + SSE_INTALUQ_ITINS_P, 1>; +defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, + SSE_INTMUL_ITINS_P, 1>; +defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, + SSE_INTALU_ITINS_P, 0>; +defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, + SSE_INTALUQ_ITINS_P, 0>; +defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; -let Predicates = [HasAVX] in { -defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>, - VEX_4V; -defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V; -defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, - i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; +// Intrinsic forms +defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, + int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; +defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, + int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; +defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, + int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; +defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, + int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; +defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, + int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; +defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, + int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; +defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, + int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; +defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, + int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; +defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, + int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; +defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, + int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; +defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, + int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; +defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, + int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>; + +let Predicates = [HasAVX] in defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; - -// Intrinsic forms -defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; -defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, - VR128, memopv2i64, i128mem, - SSE_PMADD, 1, 0>, VEX_4V; -defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { -defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, - i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, - i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, - i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, - i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L; +let Predicates = [HasAVX2] in defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, VR256, memopv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; - -// Intrinsic forms -defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, - VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, - VR256, memopv4i64, i256mem, - SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, - VR256, memopv4i64, i256mem, - SSE_PMADD, 1, 0>, VEX_4V, VEX_L; -defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P, 1>; -defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P, 1>; -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, - i128mem, SSE_INTMUL_ITINS_P, 1>; -defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, - i128mem, SSE_INTALU_ITINS_P>; -defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, - i128mem, SSE_INTALUQ_ITINS_P>; +let Constraints = "$src1 = $dst" in defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; -// Intrinsic forms -defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, - VR128, memopv2i64, i128mem, - SSE_INTMUL_ITINS_P, 1>; -defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, - VR128, memopv2i64, i128mem, - SSE_PMADD, 1>; -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - -} // Constraints = "$src1 = $dst" - //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// @@ -4126,186 +3952,106 @@ let Predicates = [UseSSE2] in { // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// -let Predicates = [HasAVX] in { - defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; - defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; - defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; - defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; - defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; - defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { - defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; - defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; - defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; - defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; - defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; - defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 1>; - defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; - defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; - defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -} // Constraints = "$src1 = $dst" +defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, + SSE_INTALU_ITINS_P, 1>; +defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, + SSE_INTALU_ITINS_P, 1>; +defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, + SSE_INTALU_ITINS_P, 1>; +defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, + SSE_INTALU_ITINS_P, 0>; +defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, + SSE_INTALU_ITINS_P, 0>; +defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, + SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Pack Instructions //===---------------------------------------------------------------------===// -let Predicates = [HasAVX] in { -defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { -defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb, - VR256, memopv4i64, i256mem, - SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { -defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128, - VR128, memopv2i64, i128mem, - SSE_INTALU_ITINS_P>; -} // Constraints = "$src1 = $dst" +defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, + int_x86_avx2_packsswb, SSE_INTALU_ITINS_P, 0>; +defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, + int_x86_avx2_packssdw, SSE_INTALU_ITINS_P, 0>; +defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, + int_x86_avx2_packuswb, SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions //===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { -multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> { -def ri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; -def mi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (vt (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], - IIC_SSE_PSHUF>; -} - -multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> { -def Yri : Ii8<0x70, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>; -def Ymi : Ii8<0x70, MRMSrcMem, - (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (vt (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))]>; -} -} // ExeDomain = SSEPackedInt - +multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, + SDNode OpNode> { let Predicates = [HasAVX] in { - let AddedComplexity = 5 in - defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX; - - // SSE2 with ImmT == Imm8 and XS prefix. - defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX; - - // SSE2 with ImmT == Imm8 and XD prefix. - defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX; - - def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), - (VPSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (VPSHUFDri VR128:$src1, imm:$imm)>; + def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + !strconcat("v", OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>, VEX; + def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i8imm:$src2), + !strconcat("v", OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; } let Predicates = [HasAVX2] in { - defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, - TB, OpSize, VEX,VEX_L; - defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, - XS, VEX, VEX_L; - defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, - XD, VEX, VEX_L; + def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, i8imm:$src2), + !strconcat("v", OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>, VEX, VEX_L; + def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), + (ins i256mem:$src1, i8imm:$src2), + !strconcat("v", OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; } let Predicates = [UseSSE2] in { - let AddedComplexity = 5 in - defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; + def ri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], + IIC_SSE_PSHUF>; + def mi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), + (i8 imm:$src2))))], IIC_SSE_PSHUF>; +} +} +} // ExeDomain = SSEPackedInt - // SSE2 with ImmT == Imm8 and XS prefix. - defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS; +defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, TB, OpSize; +defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; +defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; - // SSE2 with ImmT == Imm8 and XD prefix. - defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD; +let Predicates = [HasAVX] in { + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), + (VPSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (VPSHUFDri VR128:$src1, imm:$imm)>; +} - def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), - (PSHUFDmi addr:$src1, imm:$imm)>; - def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), - (PSHUFDri VR128:$src1, imm:$imm)>; +let Predicates = [UseSSE2] in { + def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), + (PSHUFDmi addr:$src1, imm:$imm)>; + def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), + (PSHUFDri VR128:$src1, imm:$imm)>; } //===---------------------------------------------------------------------===// @@ -5844,6 +5590,55 @@ defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; +let Predicates = [HasAVX2] in { + def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>; + def : Pat<(v8i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQYrr VR128:$src)>; + + def : Pat<(v8i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; + def : Pat<(v4i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQYrr VR128:$src)>; + + def : Pat<(v4i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v32i8 VR256:$src))), + (VPMOVSXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vsext (v16i16 VR256:$src))), + (VPMOVSXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vsext (v8i32 VR256:$src))), + (VPMOVSXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vsmovl (v8i16 (bitconvert (v2i64 (load addr:$src)))))), + (VPMOVSXWDYrm addr:$src)>; + def : Pat<(v4i64 (X86vsmovl (v4i32 (bitconvert (v2i64 (load addr:$src)))))), + (VPMOVSXDQYrm addr:$src)>; + + def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXBDYrm addr:$src)>; + def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXBDYrm addr:$src)>; + + def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXWQYrm addr:$src)>; + def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXWQYrm addr:$src)>; + + def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXBQYrm addr:$src)>; +} + let Predicates = [HasAVX] in { // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq @@ -5858,6 +5653,15 @@ let Predicates = [HasAVX] in { } let Predicates = [UseSSE41] in { + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (PMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (PMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (PMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; + // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq (bitconvert (v4i32 (X86vzmovl @@ -5868,6 +5672,34 @@ let Predicates = [UseSSE41] in { (bitconvert (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), (PMOVZXBQrm addr:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVSXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVSXWDrm addr:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (PMOVSXWQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (extloadi32i16 addr:$src))))))), + (PMOVSXBQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVSXDQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVSXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (PMOVSXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (PMOVSXBWrm addr:$src)>; } let Predicates = [HasAVX2] in { @@ -5928,6 +5760,44 @@ let Predicates = [HasAVX] in { (VPMOVZXDQrm addr:$src)>; def : Pat<(v2i64 (X86vzext (v4i32 (bitconvert (v2i64 (X86vzload addr:$src)))))), (VPMOVZXDQrm addr:$src)>; + + def : Pat<(v8i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>; + def : Pat<(v4i32 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 VR128:$src))), (VPMOVSXWQrr VR128:$src)>; + + def : Pat<(v2i64 (X86vsext (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; + + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXDQrm addr:$src)>; + def : Pat<(v4i32 (X86vsext (v8i16 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXWDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v4i32 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXDQrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))), + (VPMOVSXBWrm addr:$src)>; + def : Pat<(v8i16 (X86vsext (v16i8 (bitconvert (v2f64 + (scalar_to_vector (loadf64 addr:$src))))))), + (VPMOVSXBWrm addr:$src)>; + + def : Pat<(v4i32 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXBDrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v8i16 (bitconvert (v4i32 + (scalar_to_vector (loadi32 addr:$src))))))), + (VPMOVSXWQrm addr:$src)>; + def : Pat<(v2i64 (X86vsext (v16i8 (bitconvert (v4i32 + (scalar_to_vector (extloadi32i16 addr:$src))))))), + (VPMOVSXBQrm addr:$src)>; } let Predicates = [UseSSE41] in { @@ -6267,6 +6137,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, Intrinsic F64Int, bit Is2Addr = 1> { let ExeDomain = GenericDomain in { // Operation, reg. + let hasSideEffects = 0 in def SSr : SS4AIi8<opcss, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), !if(Is2Addr, @@ -6300,6 +6171,7 @@ let ExeDomain = GenericDomain in { OpSize; // Operation, reg. + let hasSideEffects = 0 in def SDr : SS4AIi8<opcsd, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), !if(Is2Addr, @@ -6621,67 +6493,6 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } -let Predicates = [HasAVX] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, - 0>, VEX_4V; - defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, - 0>, VEX_4V; - defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, - 0>, VEX_4V; - defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, - 0>, VEX_4V; - defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, - 0>, VEX_4V; - defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, - 0>, VEX_4V; - defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, - 0>, VEX_4V; - defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, - 0>, VEX_4V; - defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, - 0>, VEX_4V; - defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, - 0>, VEX_4V; -} - -let Predicates = [HasAVX2] in { - let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", - int_x86_avx2_packusdw>, VEX_4V, VEX_L; - defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", - int_x86_avx2_pmins_b>, VEX_4V, VEX_L; - defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", - int_x86_avx2_pmins_d>, VEX_4V, VEX_L; - defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud", - int_x86_avx2_pminu_d>, VEX_4V, VEX_L; - defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw", - int_x86_avx2_pminu_w>, VEX_4V, VEX_L; - defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb", - int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L; - defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd", - int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L; - defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud", - int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L; - defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw", - int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L; - defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", - int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; -} - -let Constraints = "$src1 = $dst" in { - let isCommutable = 0 in - defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; - defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; - defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; - defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; - defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; - defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; - defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; - defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; - defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; -} /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -6705,6 +6516,76 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, } let Predicates = [HasAVX] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, + memopv2i64, i128mem, 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", + int_x86_avx2_packusdw>, VEX_4V, VEX_L; + defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, + memopv4i64, i256mem, 0>, VEX_4V, VEX_L; + defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", + int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, + memopv2i64, i128mem>; + defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, + memopv2i64, i128mem>; + defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, + memopv2i64, i128mem>; + defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, + memopv2i64, i128mem>; + defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, + memopv2i64, i128mem>; + defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, + memopv2i64, i128mem>; + defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, + memopv2i64, i128mem>; + defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, + memopv2i64, i128mem>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} + +let Predicates = [HasAVX] in { defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, memopv2i64, i128mem, 0>, VEX_4V; defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 893488c159..1185941d34 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -51,6 +51,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), // NOTE: We don't include patterns for shifts of a register by one, because // 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). +let hasSideEffects = 0 in { def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), "shl{b}\t$dst", [], IIC_SR>; def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), @@ -59,8 +60,9 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t$dst", [], IIC_SR>; def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", [], IIC_SR>; +} // hasSideEffects = 0 } // isConvertibleToThreeAddress = 1 -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst" // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern @@ -333,6 +335,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), // Rotate instructions //===----------------------------------------------------------------------===// +let hasSideEffects = 0 in { let Constraints = "$src1 = $dst" in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", [], IIC_SR>; @@ -455,6 +458,7 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } +} // hasSideEffects = 0 let Constraints = "$src1 = $dst" in { // FIXME: provide shorter instructions when imm8 == 1 diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 4b528f6153..066830c845 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -16,7 +16,7 @@ #include "X86Relocations.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h"//TODO(dschuff):don't forget to remove these #include "llvm/Support/Disassembler.h" diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index 7b0f3434d0..f916327378 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -15,7 +15,7 @@ #define X86JITINFO_H #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetJITInfo.h" namespace llvm { diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 9b6d1d4c13..8528002f6d 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -17,6 +17,7 @@ #include "X86COFFMachineModuleInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/Type.h" using namespace llvm; namespace { diff --git a/lib/Target/X86/X86NaClRewriteFinalPass.cpp b/lib/Target/X86/X86NaClRewriteFinalPass.cpp index e850dcaf9e..0305a336e2 100644 --- a/lib/Target/X86/X86NaClRewriteFinalPass.cpp +++ b/lib/Target/X86/X86NaClRewriteFinalPass.cpp @@ -24,7 +24,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" using namespace llvm; diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp new file mode 100644 index 0000000000..83e75ea994 --- /dev/null +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -0,0 +1,212 @@ +//===-------- X86PadShortFunction.cpp - pad short functions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which will pad short functions to prevent +// a stall if a function returns before the return address is ready. This +// is needed for some Intel Atom processors. +// +//===----------------------------------------------------------------------===// + +#include <algorithm> + +#define DEBUG_TYPE "x86-pad-short-functions" +#include "X86.h" +#include "X86InstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +STATISTIC(NumBBsPadded, "Number of basic blocks padded"); + +namespace { + struct VisitedBBInfo { + // HasReturn - Whether the BB contains a return instruction + bool HasReturn; + + // Cycles - Number of cycles until return if HasReturn is true, otherwise + // number of cycles until end of the BB + unsigned int Cycles; + + VisitedBBInfo() : HasReturn(false), Cycles(0) {} + VisitedBBInfo(bool HasReturn, unsigned int Cycles) + : HasReturn(HasReturn), Cycles(Cycles) {} + }; + + struct PadShortFunc : public MachineFunctionPass { + static char ID; + PadShortFunc() : MachineFunctionPass(ID) + , Threshold(4), TM(0), TII(0) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "X86 Atom pad short functions"; + } + + private: + void findReturns(MachineBasicBlock *MBB, + unsigned int Cycles = 0); + + bool cyclesUntilReturn(MachineBasicBlock *MBB, + unsigned int &Cycles); + + void addPadding(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &MBBI, + unsigned int NOOPsToAdd); + + const unsigned int Threshold; + + // ReturnBBs - Maps basic blocks that return to the minimum number of + // cycles until the return, starting from the entry block. + DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs; + + // VisitedBBs - Cache of previously visited BBs. + DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs; + + const TargetMachine *TM; + const TargetInstrInfo *TII; + }; + + char PadShortFunc::ID = 0; +} + +FunctionPass *llvm::createX86PadShortFunctions() { + return new PadShortFunc(); +} + +/// runOnMachineFunction - Loop over all of the basic blocks, inserting +/// NOOP instructions before early exits. +bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { + const AttributeSet &FnAttrs = MF.getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize)) { + return false; + } + + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + + // Search through basic blocks and mark the ones that have early returns + ReturnBBs.clear(); + VisitedBBs.clear(); + findReturns(MF.begin()); + + bool MadeChange = false; + + MachineBasicBlock *MBB; + unsigned int Cycles = 0; + + // Pad the identified basic blocks with NOOPs + for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin(); + I != ReturnBBs.end(); ++I) { + MBB = I->first; + Cycles = I->second; + + if (Cycles < Threshold) { + // BB ends in a return. Skip over any DBG_VALUE instructions + // trailing the terminator. + assert(MBB->size() > 0 && + "Basic block should contain at least a RET but is empty"); + MachineBasicBlock::iterator ReturnLoc = --MBB->end(); + + while (ReturnLoc->isDebugValue()) + --ReturnLoc; + assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() && + "Basic block does not end with RET"); + + addPadding(MBB, ReturnLoc, Threshold - Cycles); + NumBBsPadded++; + MadeChange = true; + } + } + + return MadeChange; +} + +/// findReturn - Starting at MBB, follow control flow and add all +/// basic blocks that contain a return to ReturnBBs. +void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { + // If this BB has a return, note how many cycles it takes to get there. + bool hasReturn = cyclesUntilReturn(MBB, Cycles); + if (Cycles >= Threshold) + return; + + if (hasReturn) { + ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles); + return; + } + + // Follow branches in BB and look for returns + for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); + I != MBB->succ_end(); ++I) { + if (*I == MBB) + continue; + findReturns(*I, Cycles); + } +} + +/// cyclesUntilReturn - return true if the MBB has a return instruction, +/// and return false otherwise. +/// Cycles will be incremented by the number of cycles taken to reach the +/// return or the end of the BB, whichever occurs first. +bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, + unsigned int &Cycles) { + // Return cached result if BB was previously visited + DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it + = VisitedBBs.find(MBB); + if (it != VisitedBBs.end()) { + VisitedBBInfo BBInfo = it->second; + Cycles += BBInfo.Cycles; + return BBInfo.HasReturn; + } + + unsigned int CyclesToEnd = 0; + + for (MachineBasicBlock::iterator MBBI = MBB->begin(); + MBBI != MBB->end(); ++MBBI) { + MachineInstr *MI = MBBI; + // Mark basic blocks with a return instruction. Calls to other + // functions do not count because the called function will be padded, + // if necessary. + if (MI->isReturn() && !MI->isCall()) { + VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd); + Cycles += CyclesToEnd; + return true; + } + + CyclesToEnd += TII->getInstrLatency(TM->getInstrItineraryData(), MI); + } + + VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd); + Cycles += CyclesToEnd; + return false; +} + +/// addPadding - Add the given number of NOOP instructions to the function +/// just prior to the return at MBBI +void PadShortFunc::addPadding(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &MBBI, + unsigned int NOOPsToAdd) { + DebugLoc DL = MBBI->getDebugLoc(); + + while (NOOPsToAdd-- > 0) { + BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); + BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); + } +} diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 1b16d6d8f7..f32bd664e1 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -28,8 +28,9 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -37,7 +38,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" @@ -61,10 +61,12 @@ extern cl::opt<bool> FlagRestrictR15; X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) - : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() - ? X86::RIP : X86::EIP, + : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::RIP : X86::EIP), X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false), - X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true)), + X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true), + (tm.getSubtarget<X86Subtarget>().is64Bit() + ? X86::RIP : X86::EIP)), TM(tm), TII(tii) { X86_MC::InitLLVM2SEHRegisterMapping(this); @@ -441,7 +443,8 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->getFnAttributes().hasAttribute(Attributes::StackAlignment)); + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); // If we've requested that we force align the stack do so now. if (ForceStackAlign) diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index c14407f9ac..d99d085298 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass; // latencies. Since these latencies are not used for pipeline hazards, // they do not need to be exact. // +// ILPWindow=10 is an arbitrary threshold that approximates cycles of +// latency hidden by instruction buffers. The actual value is not very +// important but should be zero for inorder and nonzero for OOO processors. +// // The GenericModel contains no instruciton itineraries. def GenericModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; let LoadLatency = 4; let HighLatency = 10; + let ILPWindow = 10; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 87102614cc..1e5f2d6c9a 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel { // OperandCycles may be used for expected latency. let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. + let ILPWindow = 0; // Always try to hide expected latency. let Itineraries = AtomItineraries; } diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 8c7e09c324..7814ca41cf 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "x86-selectiondag-info" #include "X86TargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/DerivedTypes.h" using namespace llvm; X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 52175bcd6c..b517019e85 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" @@ -358,6 +358,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , UseLeaForSP(false) , HasSlowDivide(false) , PostRAScheduler(false) + , PadShortFunctions(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 5e5485ea49..b5e3ec6270 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -15,7 +15,7 @@ #define X86SUBTARGET_H #include "llvm/ADT/Triple.h" -#include "llvm/CallingConv.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -146,6 +146,10 @@ protected: /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; + /// PadShortFunctions - True if the short functions should be padded to prevent + /// a stall when returning too early. + bool PadShortFunctions; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -234,6 +238,7 @@ public: bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } + bool padShortFunctions() const { return PadShortFunctions; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 38a4fb12ce..c293bce4f5 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -48,10 +48,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-" "n8:16:32-S128"), InstrInfo(*this), - TSInfo(*this), TLInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo) { + TSInfo(*this), + JITInfo(*this) { } void X86_64TargetMachine::anchor() { } @@ -68,10 +67,9 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), - TSInfo(*this), TLInfo(*this), - JITInfo(*this), - STTI(&TLInfo), VTTI(&TLInfo){ + TSInfo(*this), + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. @@ -126,6 +124,19 @@ X86EarlyIfConv("x86-early-ifcvt", cl::desc("Enable early if-conversion on X86")); //===----------------------------------------------------------------------===// +// X86 Analysis Pass Setup +//===----------------------------------------------------------------------===// + +void X86TargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our X86 pass. This + // allows the X86 pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createX86TargetTransformInfoPass(this)); +} + + +//===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// @@ -196,6 +207,12 @@ bool X86PassConfig::addPreEmitPass() { ShouldPrint = true; } + if (getOptLevel() != CodeGenOpt::None && + getX86Subtarget().padShortFunctions()) { + addPass(createX86PadShortFunctions()); + ShouldPrint = true; + } + // @LOCALMOD-START if (getX86Subtarget().isTargetNaCl()) { addPass(createX86NaClRewritePass()); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 792f721e76..174d391831 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -21,10 +21,9 @@ #include "X86JITInfo.h" #include "X86SelectionDAGInfo.h" #include "X86Subtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -65,6 +64,9 @@ public: return &InstrItins; } + /// \brief Register X86 analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Set up the pass pipeline. virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -78,11 +80,9 @@ class X86_32TargetMachine : public X86TargetMachine { virtual void anchor(); const DataLayout DL; // Calculates type size & alignment X86InstrInfo InstrInfo; - X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -101,12 +101,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; /// X86_64TargetMachine - X86 64-bit target machine. @@ -115,11 +109,9 @@ class X86_64TargetMachine : public X86TargetMachine { virtual void anchor(); const DataLayout DL; // Calculates type size & alignment X86InstrInfo InstrInfo; - X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86JITInfo JITInfo; - X86ScalarTargetTransformImpl STTI; - X86VectorTargetTransformInfo VTTI; public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -138,12 +130,6 @@ public: virtual X86JITInfo *getJITInfo() { return &JITInfo; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 4a1788a85e..3347449cb7 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -8,17 +8,13 @@ //===----------------------------------------------------------------------===// #include "X86TargetObjectFile.h" -#include "X86TargetMachine.h" #include "X86Subtarget.h" // @LOCALMOD -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/Mangler.h" + using namespace llvm; using namespace dwarf; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp new file mode 100644 index 0000000000..675c896d70 --- /dev/null +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -0,0 +1,383 @@ +//===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// X86 target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86tti" +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeX86TTIPass(PassRegistry &); +} + +namespace { + +class X86TTI : public ImmutablePass, public TargetTransformInfo { + const X86TargetMachine *TM; + const X86Subtarget *ST; + const X86TargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + X86TTI(const X86TargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializeX86TTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti", + "X86 Target Transform Info", true, true, false) +char X86TTI::ID = 0; + +ImmutablePass * +llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { + return new X86TTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// X86 cost model. +// +//===----------------------------------------------------------------------===// + +namespace { +struct X86CostTblEntry { + int ISD; + MVT Type; + unsigned Cost; +}; +} + +static int +FindInTable(const X86CostTblEntry *Tbl, unsigned len, int ISD, MVT Ty) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Type == Ty) + return i; + + // Could not find an entry. + return -1; +} + +namespace { +struct X86TypeConversionCostTblEntry { + int ISD; + MVT Dst; + MVT Src; + unsigned Cost; +}; +} + +static int +FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len, + int ISD, MVT Dst, MVT Src) { + for (unsigned int i = 0; i < len; ++i) + if (Tbl[i].ISD == ISD && Tbl[i].Src == Src && Tbl[i].Dst == Dst) + return i; + + // Could not find an entry. + return -1; +} + +X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // TODO: Currently the __builtin_popcount() implementation using SSE3 + // instructions is inefficient. Once the problem is fixed, we should + // call ST->hasSSE3() instead of ST->hasSSE4(). + return ST->hasSSE41() ? PSK_FastHardware : PSK_Software; +} + +unsigned X86TTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasSSE1()) + return 0; + + if (ST->is64Bit()) + return 16; + return 8; +} + +unsigned X86TTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAVX()) return 256; + if (ST->hasSSE1()) return 128; + return 0; + } + + if (ST->is64Bit()) + return 64; + return 32; + +} + +unsigned X86TTI::getMaximumUnrollFactor() const { + if (ST->isAtom()) + return 1; + + // Sandybridge and Haswell have multiple execution ports and pipelined + // vector units. + if (ST->hasAVX()) + return 4; + + return 2; +} + +unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::MUL, MVT::v4i64, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + }; + + // Look for AVX1 lowering tricks. + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, + LT.second); + if (Idx != -1) + return LT.first * AVX1CostTable[Idx].Cost; + } + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only estimate the cost of reverse shuffles. + if (Kind != SK_Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. + + // Multiple by the number of parts. + return Cost * LT.first; +} + +unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + static const X86TypeConversionCostTblEntry AVXConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, + { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, + }; + + if (ST->hasAVX()) { + int Idx = FindInConvertTable(AVXConversionTbl, + array_lengthof(AVXConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return AVXConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + + MVT MTy = LT.second; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + static const X86CostTblEntry SSE42CostTbl[] = { + { ISD::SETCC, MVT::v2f64, 1 }, + { ISD::SETCC, MVT::v4f32, 1 }, + { ISD::SETCC, MVT::v2i64, 1 }, + { ISD::SETCC, MVT::v4i32, 1 }, + { ISD::SETCC, MVT::v8i16, 1 }, + { ISD::SETCC, MVT::v16i8, 1 }, + }; + + static const X86CostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1 }, + { ISD::SETCC, MVT::v8f32, 1 }, + // AVX1 does not support 8-wide integer compare. + { ISD::SETCC, MVT::v4i64, 4 }, + { ISD::SETCC, MVT::v8i32, 4 }, + { ISD::SETCC, MVT::v16i16, 4 }, + { ISD::SETCC, MVT::v32i8, 4 }, + }; + + static const X86CostTblEntry AVX2CostTbl[] = { + { ISD::SETCC, MVT::v4i64, 1 }, + { ISD::SETCC, MVT::v8i32, 1 }, + { ISD::SETCC, MVT::v16i16, 1 }, + { ISD::SETCC, MVT::v32i8, 1 }, + }; + + if (ST->hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX2CostTbl[Idx].Cost; + } + + if (ST->hasAVX()) { + int Idx = FindInTable(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * AVX1CostTbl[Idx].Cost; + } + + if (ST->hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (Idx != -1) + return LT.first * SSE42CostTbl[Idx].Cost; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + if (Index != -1U) { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // Floating point scalars are already located in index #0. + if (Val->getScalarType()->isFloatingPointTy() && Index == 0) + return 0; + } + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // On Sandybridge 256bit load/stores are double pumped + // (but not on Haswell). + if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2()) + Cost*=2; + + return Cost; +} diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index ca94f03a64..099ad390d2 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS XCore.td) tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info) tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM XCoreGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel) tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv) @@ -15,6 +16,7 @@ add_llvm_target(XCoreCodeGen XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp XCoreMachineFunctionInfo.cpp + XCoreMCInstLower.cpp XCoreRegisterInfo.cpp XCoreSubtarget.cpp XCoreTargetMachine.cpp @@ -24,5 +26,7 @@ add_llvm_target(XCoreCodeGen add_dependencies(LLVMXCoreCodeGen intrinsics_gen) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/Disassembler/CMakeLists.txt b/lib/Target/XCore/Disassembler/CMakeLists.txt new file mode 100644 index 0000000000..cdc5d993b8 --- /dev/null +++ b/lib/Target/XCore/Disassembler/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMXCoreDisassembler + XCoreDisassembler.cpp + ) + +add_dependencies(LLVMXCoreDisassembler XCoreCommonTableGen) diff --git a/lib/Target/XCore/Disassembler/LLVMBuild.txt b/lib/Target/XCore/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000000..028de2cb34 --- /dev/null +++ b/lib/Target/XCore/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/XCore/Disassembler/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = XCoreDisassembler +parent = XCore +required_libraries = MC Support XCoreInfo +add_to_library_groups = XCore diff --git a/lib/Target/XCore/Disassembler/Makefile b/lib/Target/XCore/Disassembler/Makefile new file mode 100644 index 0000000000..4caffdd1da --- /dev/null +++ b/lib/Target/XCore/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/XCore/Disassembler/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMXCoreDisassembler + +# Hack: we need to include 'main' XCore target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp new file mode 100644 index 0000000000..094f18ceee --- /dev/null +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -0,0 +1,324 @@ +//===- XCoreDisassembler.cpp - Disassembler for XCore -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file is part of the XCore Disassembler. +/// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "XCoreRegisterInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +namespace { + +/// \brief A disassembler class for XCore. +class XCoreDisassembler : public MCDisassembler { + const MCRegisterInfo *RegInfo; +public: + XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) : + MCDisassembler(STI), RegInfo(Info) {} + + /// \brief See MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + const MCRegisterInfo *getRegInfo() const { return RegInfo; } +}; +} + +static bool readInstruction16(const MemoryObject ®ion, + uint64_t address, + uint64_t &size, + uint16_t &insn) { + uint8_t Bytes[4]; + + // We want to read exactly 2 Bytes of data. + if (region.readBytes(address, 2, Bytes, NULL) == -1) { + size = 0; + return false; + } + // Encoded as a little-endian 16-bit word in the stream. + insn = (Bytes[0] << 0) | (Bytes[1] << 8); + return true; +} + +static bool readInstruction32(const MemoryObject ®ion, + uint64_t address, + uint64_t &size, + uint32_t &insn) { + uint8_t Bytes[4]; + + // We want to read exactly 4 Bytes of data. + if (region.readBytes(address, 4, Bytes, NULL) == -1) { + size = 0; + return false; + } + // Encoded as a little-endian 32-bit word in the stream. + insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | + (Bytes[3] << 24); + return true; +} + +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D); + return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); +} + +static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); + +static DecodeStatus Decode2RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeR2RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus Decode2RSrcDstInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRUSInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRUSBitpInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeRUSSrcDstBitpInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeL2RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeLR2RInstruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +#include "XCoreGenDisassemblerTables.inc" + +static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) +{ + if (RegNo > 11) + return MCDisassembler::Fail; + unsigned Reg = getReg(Decoder, XCore::GRRegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + if (Val > 11) + return MCDisassembler::Fail; + static unsigned Values[] = { + 32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32 + }; + Inst.addOperand(MCOperand::CreateImm(Values[Val])); + return MCDisassembler::Success; +} + +static DecodeStatus +Decode2OpInstruction(unsigned Insn, unsigned &Op1, unsigned &Op2) { + unsigned Combined = fieldFromInstruction(Insn, 6, 5) + + fieldFromInstruction(Insn, 5, 1) * 5 - 27; + if (Combined >= 9) + return MCDisassembler::Fail; + + unsigned Op1High = Combined % 3; + unsigned Op2High = Combined / 3; + Op1 = (Op1High << 2) | fieldFromInstruction(Insn, 2, 2); + Op2 = (Op2High << 2) | fieldFromInstruction(Insn, 0, 2); + return MCDisassembler::Success; +} + +static DecodeStatus +Decode2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op2, Op1); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + } + return S; +} + +static DecodeStatus +Decode2RSrcDstInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeRUSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + Inst.addOperand(MCOperand::CreateImm(Op2)); + } + return S; +} + +static DecodeStatus +DecodeRUSBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeBitpOperand(Inst, Op2, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeRUSSrcDstBitpInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(Insn, Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeBitpOperand(Inst, Op2, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeL2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), + Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + } + return S; +} + +static DecodeStatus +DecodeLR2RInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, + const void *Decoder) { + unsigned Op1, Op2; + DecodeStatus S = Decode2OpInstruction(fieldFromInstruction(Insn, 0, 16), + Op1, Op2); + if (S == MCDisassembler::Success) { + DecodeGRRegsRegisterClass(Inst, Op2, Address, Decoder); + DecodeGRRegsRegisterClass(Inst, Op1, Address, Decoder); + } + return S; +} + +MCDisassembler::DecodeStatus +XCoreDisassembler::getInstruction(MCInst &instr, + uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &vStream, + raw_ostream &cStream) const { + uint16_t insn16; + + if (!readInstruction16(Region, Address, Size, insn16)) { + return Fail; + } + + // Calling the auto-generated decoder function. + DecodeStatus Result = decodeInstruction(DecoderTable16, instr, insn16, + Address, this, STI); + if (Result != Fail) { + Size = 2; + return Result; + } + + uint32_t insn32; + + if (!readInstruction32(Region, Address, Size, insn32)) { + return Fail; + } + + // Calling the auto-generated decoder function. + Result = decodeInstruction(DecoderTable32, instr, insn32, Address, this, STI); + if (Result != Fail) { + Size = 4; + return Result; + } + + return Fail; +} + +namespace llvm { + extern Target TheXCoreTarget; +} + +static MCDisassembler *createXCoreDisassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new XCoreDisassembler(STI, T.createMCRegInfo("")); +} + +extern "C" void LLVMInitializeXCoreDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheXCoreTarget, + createXCoreDisassembler); +} diff --git a/lib/Target/XCore/InstPrinter/CMakeLists.txt b/lib/Target/XCore/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000000..930e733cd7 --- /dev/null +++ b/lib/Target/XCore/InstPrinter/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMXCoreAsmPrinter + XCoreInstPrinter.cpp + ) + +add_dependencies(LLVMXCoreAsmPrinter XCoreCommonTableGen) diff --git a/lib/Target/XCore/InstPrinter/LLVMBuild.txt b/lib/Target/XCore/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000000..8750bc7ace --- /dev/null +++ b/lib/Target/XCore/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/XCore/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = XCoreAsmPrinter +parent = XCore +required_libraries = MC Support +add_to_library_groups = XCore diff --git a/lib/Target/XCore/InstPrinter/Makefile b/lib/Target/XCore/InstPrinter/Makefile new file mode 100644 index 0000000000..1c1c61299c --- /dev/null +++ b/lib/Target/XCore/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMXCoreAsmPrinter + +# Hack: we need to include 'main' xcore target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp new file mode 100644 index 0000000000..1592351c38 --- /dev/null +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -0,0 +1,97 @@ +//===-- XCoreInstPrinter.cpp - Convert XCore MCInst to assembly syntax ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an XCore MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "XCoreInstPrinter.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#include "XCoreGenAsmWriter.inc" + +void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << StringRef(getRegisterName(RegNo)).lower(); +} + +void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printInstruction(MI, O); + printAnnotation(O, Annot); +} + +void XCoreInstPrinter:: +printInlineJT(const MCInst *MI, int opNum, raw_ostream &O) { + report_fatal_error("can't handle InlineJT"); +} + +void XCoreInstPrinter:: +printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O) { + report_fatal_error("can't handle InlineJT32"); +} + +static void printExpr(const MCExpr *Expr, raw_ostream &OS) { + int Offset = 0; + const MCSymbolRefExpr *SRE; + + if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) { + SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); + assert(SRE && CE && "Binary expression must be sym+const."); + Offset = CE->getValue(); + } else { + SRE = dyn_cast<MCSymbolRefExpr>(Expr); + assert(SRE && "Unexpected MCExpr type."); + } + assert(SRE->getKind() == MCSymbolRefExpr::VK_None); + + OS << SRE->getSymbol(); + + if (Offset) { + if (Offset > 0) + OS << '+'; + OS << Offset; + } +} + +void XCoreInstPrinter:: +printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + printRegName(O, Op.getReg()); + return; + } + + if (Op.isImm()) { + O << Op.getImm(); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + printExpr(Op.getExpr(), O); +} + +void XCoreInstPrinter:: +printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) { + printOperand(MI, opNum, O); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) + return; + + O << "+"; + printOperand(MI, opNum+1, O); +} diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h new file mode 100644 index 0000000000..772c515b5c --- /dev/null +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h @@ -0,0 +1,44 @@ +//== XCoreInstPrinter.h - Convert XCore MCInst to assembly syntax -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains the declaration of the XCoreInstPrinter class, +/// which is used to print XCore MCInst to a .s file. +/// +//===----------------------------------------------------------------------===// + +#ifndef XCOREINSTPRINTER_H +#define XCOREINSTPRINTER_H +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class TargetMachine; + +class XCoreInstPrinter : public MCInstPrinter { +public: + XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); +private: + void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O); + void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt index 53b4a9e3f5..59e64ad085 100644 --- a/lib/Target/XCore/LLVMBuild.txt +++ b/lib/Target/XCore/LLVMBuild.txt @@ -16,13 +16,14 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = MCTargetDesc TargetInfo +subdirectories = Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = XCore parent = Target has_asmprinter = 1 +has_disassembler = 1 [component_1] type = Library diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt index a80c939b43..8213f9e428 100644 --- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = XCoreDesc parent = XCore -required_libraries = MC XCoreInfo +required_libraries = MC XCoreAsmPrinter XCoreInfo add_to_library_groups = XCore diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index bbfdd4356f..b5b072dcbd 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "XCoreMCTargetDesc.h" +#include "InstPrinter/XCoreInstPrinter.h" #include "XCoreMCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" @@ -69,6 +70,15 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, return X; } +static MCInstPrinter *createXCoreMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + return new XCoreInstPrinter(MAI, MII, MRI); +} + // Force static initialization. extern "C" void LLVMInitializeXCoreTargetMC() { // Register the MC asm info. @@ -87,4 +97,8 @@ extern "C" void LLVMInitializeXCoreTargetMC() { // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget, createXCoreMCSubtargetInfo); + + // Register the MCInstPrinter + TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget, + createXCoreMCInstPrinter); } diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile index b823c4ed37..92ddc88608 100644 --- a/lib/Target/XCore/Makefile +++ b/lib/Target/XCore/Makefile @@ -14,10 +14,10 @@ TARGET = XCore # Make sure that tblgen is run, first thing. BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \ XCoreGenAsmWriter.inc \ - XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ - XCoreGenSubtargetInfo.inc + XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ + XCoreGenDisassemblerTables.inc XCoreGenSubtargetInfo.inc -DIRS = TargetInfo MCTargetDesc +DIRS = Disassembler InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp index 9a0971d1e4..00e34e04fb 100644 --- a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp +++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "XCore.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td index 04a1dd5e95..e9a6d88fd6 100644 --- a/lib/Target/XCore/XCore.td +++ b/lib/Target/XCore/XCore.td @@ -41,7 +41,13 @@ def : Proc<"xs1b-generic", []>; // Declare the target which we are implementing //===----------------------------------------------------------------------===// +def XCoreAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + def XCore : Target { // Pull in Instruction Info: let InstructionSet = XCoreInstrInfo; + let AssemblyWriters = [XCoreAsmWriter]; } diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 6760641efe..0d146ba4d9 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -14,7 +14,9 @@ #define DEBUG_TYPE "asm-printer" #include "XCore.h" +#include "InstPrinter/XCoreInstPrinter.h" #include "XCoreInstrInfo.h" +#include "XCoreMCInstLower.h" #include "XCoreSubtarget.h" #include "XCoreTargetMachine.h" #include "llvm/ADT/SmallString.h" @@ -25,14 +27,15 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Constants.h" -#include "llvm/DataLayout.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -52,16 +55,17 @@ static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; + XCoreMCInstLower MCInstLowering; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); public: explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){} + : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()), + MCInstLowering(*this) {} virtual const char *getPassName() const { return "XCore Assembly Printer"; } - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, const std::string &directive = ".jmptable"); void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) { @@ -75,18 +79,14 @@ namespace { void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV); virtual void EmitGlobalVariable(const GlobalVariable *GV); - void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd. - static const char *getRegisterName(unsigned RegNo); - void EmitFunctionEntryLabel(); void EmitInstruction(const MachineInstr *MI); + void EmitFunctionBodyStart(); void EmitFunctionBodyEnd(); virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end of anonymous namespace -#include "XCoreGenAsmWriter.inc" - void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { assert(((GV->hasExternalLinkage() || GV->hasWeakLinkage()) || @@ -171,12 +171,16 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // The ABI requires that unsigned scalar types smaller than 32 bits // are padded to 32 bits. if (Size < 4) - OutStreamer.EmitZeros(4 - Size, 0); + OutStreamer.EmitZeros(4 - Size); // Mark the end of the global OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data"); } +void XCoreAsmPrinter::EmitFunctionBodyStart() { + MCInstLowering.Initialize(Mang, &MF->getContext()); +} + /// EmitFunctionBodyEnd - Targets can override this to emit stuff after /// the last basic block in the function. void XCoreAsmPrinter::EmitFunctionBodyEnd() { @@ -192,17 +196,6 @@ void XCoreAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitLabel(CurrentFnSym); } -void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - printOperand(MI, opNum, O); - - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; - - O << "+"; - printOperand(MI, opNum+1, O); -} - void XCoreAsmPrinter:: printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, const std::string &directive) { @@ -225,7 +218,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, const MachineOperand &MO = MI->getOperand(opNum); switch (MO.getType()) { case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); + O << XCoreInstPrinter::getRegisterName(MO.getReg()); break; case MachineOperand::MO_Immediate: O << MO.getImm(); @@ -270,7 +263,7 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); } -printOperand(MI, OpNo, O); + printOperand(MI, OpNo, O); return false; } @@ -317,15 +310,30 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case XCore::ADD_2rus: if (MI->getOperand(2).getImm() == 0) { - O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " - << getRegisterName(MI->getOperand(1).getReg()); + O << "\tmov " + << XCoreInstPrinter::getRegisterName(MI->getOperand(0).getReg()) << ", " + << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg()); OutStreamer.EmitRawText(O.str()); return; } break; + case XCore::BR_JT: + case XCore::BR_JT32: + O << "\tbru " + << XCoreInstPrinter::getRegisterName(MI->getOperand(1).getReg()) << '\n'; + if (MI->getOpcode() == XCore::BR_JT) + printInlineJT(MI, 0, O); + else + printInlineJT32(MI, 0, O); + O << '\n'; + OutStreamer.EmitRawText(O.str()); + return; } - printInstruction(MI, O); - OutStreamer.EmitRawText(O.str()); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + OutStreamer.EmitInstruction(TmpInst); } // Force static initialization. diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 9257226f80..bb9c77a32f 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -22,8 +22,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/DataLayout.h" -#include "llvm/Function.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" @@ -100,11 +100,8 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { bool FP = hasFP(MF); const AttributeSet &PAL = MF.getFunction()->getAttributes(); - for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I) - if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) { - loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); - break; - } + if (PAL.hasAttrSomewhere(Attribute::Nest)) + loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); // Work out frame sizes. int FrameSize = MFI->getStackSize(); diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 459664bf58..472ce6305c 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -13,18 +13,18 @@ #include "XCore.h" #include "XCoreTargetMachine.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 391660a095..6e894acedb 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -19,7 +19,6 @@ #include "XCoreSubtarget.h" #include "XCoreTargetMachine.h" #include "XCoreTargetObjectFile.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -28,11 +27,12 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td index 1963a70fb3..44ac45c72f 100644 --- a/lib/Target/XCore/XCoreInstrFormats.td +++ b/lib/Target/XCore/XCoreInstrFormats.td @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// // Instruction format superclass //===----------------------------------------------------------------------===// -class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern> +class InstXCore<int sz, dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction { field bits<32> Inst; @@ -19,102 +19,143 @@ class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern> dag InOperandList = ins; let AsmString = asmstr; let Pattern = pattern; + let Size = sz; + field bits<32> SoftFail = 0; } // XCore pseudo instructions format class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern>; + : InstXCore<0, outs, ins, asmstr, pattern> { + let isPseudo = 1; +} //===----------------------------------------------------------------------===// // Instruction formats //===----------------------------------------------------------------------===// class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<2, outs, ins, asmstr, pattern> { } class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<2, outs, ins, asmstr, pattern> { } class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<2, outs, ins, asmstr, pattern> { } class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<2, outs, ins, asmstr, pattern> { } class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<2, outs, ins, asmstr, pattern> { } class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { +} + +class _F2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc{5-1}; + let Inst{4} = opc{0}; + let DecoderMethod = "Decode2RInstruction"; +} + +// 2R with first operand as both a source and a destination. +class _F2RSrcDst<bits<6> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> : _F2R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "Decode2RSrcDstInstruction"; +} + +// Same as 2R with last two operands swapped +class _FR2R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : _F2R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeR2RInstruction"; } -class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; +class _FRUS<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc{5-1}; + let Inst{4} = opc{0}; + let DecoderMethod = "DecodeRUSInstruction"; } -class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; +// RUS with bitp operand +class _FRUSBitp<bits<6> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> + : _FRUS<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeRUSBitpInstruction"; } -class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; +// RUS with first operand as both a source and a destination and a bitp second +// operand +class _FRUSSrcDstBitp<bits<6> opc, dag outs, dag ins, string asmstr, + list<dag> pattern> + : _FRUS<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeRUSSrcDstBitpInstruction"; } -class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; +class _FL2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstXCore<4, outs, ins, asmstr, pattern> { + let Inst{31-27} = opc{9-5}; + let Inst{26-20} = 0b1111110; + let Inst{19-16} = opc{4-1}; + + let Inst{15-11} = 0b11111; + let Inst{4} = opc{0}; + let DecoderMethod = "DecodeL2RInstruction"; +} + +// Same as L2R with last two operands swapped +class _FLR2R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : _FL2R<opc, outs, ins, asmstr, pattern> { + let DecoderMethod = "DecodeLR2RInstruction"; +} + +class _F1R<bits<6> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstXCore<2, outs, ins, asmstr, pattern> { + bits<4> a; + + let Inst{15-11} = opc{5-1}; + let Inst{10-5} = 0b111111; + let Inst{4} = opc{0}; + let Inst{3-0} = a; } -class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; +class _F0R<bits<10> opc, dag outs, dag ins, string asmstr, list<dag> pattern> + : InstXCore<2, outs, ins, asmstr, pattern> { + let Inst{15-11} = opc{9-5}; + let Inst{10-5} = 0b111111; + let Inst{4-0} = opc{4-0}; } class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern> - : InstXCore<outs, ins, asmstr, pattern> { - let Inst{31-0} = 0; + : InstXCore<4, outs, ins, asmstr, pattern> { } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 3e7666bdb9..95b076fdb4 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -344,10 +344,9 @@ multiclass FU10_LU10_np<string OpcStr> { // Two operand short -class F2R_np<string OpcStr> : _F2R< - (outs GRRegs:$dst), (ins GRRegs:$b), - !strconcat(OpcStr, " $dst, $b"), - []>; +class F2R_np<bits<6> opc, string OpcStr> : + _F2R<opc, (outs GRRegs:$dst), (ins GRRegs:$b), + !strconcat(OpcStr, " $dst, $b"), []>; // Two operand long @@ -357,23 +356,23 @@ class F2R_np<string OpcStr> : _F2R< let Defs = [SP], Uses = [SP] in { def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt), - "${:comment} ADJCALLSTACKDOWN $amt", + "# ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "${:comment} ADJCALLSTACKUP $amt1", + "# ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; } def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr), - "${:comment} LDWFI $dst, $addr", + "# LDWFI $dst, $addr", [(set GRRegs:$dst, (load ADDRspii:$addr))]>; def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr), - "${:comment} LDAWFI $dst, $addr", + "# LDAWFI $dst, $addr", [(set GRRegs:$dst, ADDRspii:$addr)]>; def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr), - "${:comment} STWFI $src, $addr", + "# STWFI $src, $addr", [(store GRRegs:$src, ADDRspii:$addr)]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after @@ -381,7 +380,7 @@ def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr), let usesCustomInserter = 1 in { def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst), (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F), - "${:comment} SELECT_CC PSEUDO!", + "# SELECT_CC PSEUDO!", [(set GRRegs:$dst, (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>; } @@ -753,210 +752,210 @@ def BL_lu10 : _FLU10< // Two operand short // TODO eet, eef, tsetmr -def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), - "not $dst, $b", - [(set GRRegs:$dst, (not GRRegs:$b))]>; +def NOT : _F2R<0b100010, (outs GRRegs:$dst), (ins GRRegs:$b), + "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; -def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), - "neg $dst, $b", - [(set GRRegs:$dst, (ineg GRRegs:$b))]>; +def NEG : _F2R<0b100100, (outs GRRegs:$dst), (ins GRRegs:$b), + "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; let Constraints = "$src1 = $dst" in { -def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), - "sext $dst, $src2", - [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, - immBitp:$src2))]>; - -def SEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), - "sext $dst, $src2", - [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, - GRRegs:$src2))]>; - -def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), - "zext $dst, $src2", - [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, - immBitp:$src2))]>; - -def ZEXT_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), - "zext $dst, $src2", - [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, - GRRegs:$src2))]>; - -def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), - "andnot $dst, $src2", - [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>; +def SEXT_rus : + _FRUSSrcDstBitp<0b001101, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), + "sext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, + immBitp:$src2))]>; + +def SEXT_2r : + _F2RSrcDst<0b001100, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "sext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_sext GRRegs:$src1, GRRegs:$src2))]>; + +def ZEXT_rus : + _FRUSSrcDstBitp<0b010001, (outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), + "zext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, + immBitp:$src2))]>; + +def ZEXT_2r : + _F2RSrcDst<0b010000, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "zext $dst, $src2", + [(set GRRegs:$dst, (int_xcore_zext GRRegs:$src1, GRRegs:$src2))]>; + +def ANDNOT_2r : + _F2RSrcDst<0b001010, (outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2), + "andnot $dst, $src2", + [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>; } let isReMaterializable = 1, neverHasSideEffects = 1 in -def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size), - "mkmsk $dst, $size", - []>; +def MKMSK_rus : _FRUSBitp<0b101001, (outs GRRegs:$dst), (ins i32imm:$size), + "mkmsk $dst, $size", []>; -def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size), - "mkmsk $dst, $size", - [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>; +def MKMSK_2r : _F2R<0b101000, (outs GRRegs:$dst), (ins GRRegs:$size), + "mkmsk $dst, $size", + [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>; -def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), - "getr $dst, $type", - [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; +def GETR_rus : _FRUS<0b100000, (outs GRRegs:$dst), (ins i32imm:$type), + "getr $dst, $type", + [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; -def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), - "getts $dst, res[$r]", - [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>; +def GETTS_2r : _F2R<0b001110, (outs GRRegs:$dst), (ins GRRegs:$r), + "getts $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>; -def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "setpt res[$r], $val", - [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>; +def SETPT_2r : _FR2R<0b001111, (outs), (ins GRRegs:$r, GRRegs:$val), + "setpt res[$r], $val", + [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>; -def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "outct res[$r], $val", - [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; +def OUTCT_2r : _F2R<0b010010, (outs), (ins GRRegs:$r, GRRegs:$val), + "outct res[$r], $val", + [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; -def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val), - "outct res[$r], $val", - [(int_xcore_outct GRRegs:$r, immUs:$val)]>; +def OUTCT_rus : _FRUS<0b010011, (outs), (ins GRRegs:$r, i32imm:$val), + "outct res[$r], $val", + [(int_xcore_outct GRRegs:$r, immUs:$val)]>; -def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "outt res[$r], $val", - [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>; +def OUTT_2r : _FR2R<0b000011, (outs), (ins GRRegs:$r, GRRegs:$val), + "outt res[$r], $val", + [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>; -def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "out res[$r], $val", - [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; +def OUT_2r : _FR2R<0b101010, (outs), (ins GRRegs:$r, GRRegs:$val), + "out res[$r], $val", + [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; let Constraints = "$src = $dst" in -def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), - "outshr res[$r], $src", - [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, - GRRegs:$src))]>; +def OUTSHR_2r : + _F2RSrcDst<0b101011, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r), + "outshr res[$r], $src", + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; -def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), - "inct $dst, res[$r]", - [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; +def INCT_2r : _F2R<0b100001, (outs GRRegs:$dst), (ins GRRegs:$r), + "inct $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; -def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), - "int $dst, res[$r]", - [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>; +def INT_2r : _F2R<0b100011, (outs GRRegs:$dst), (ins GRRegs:$r), + "int $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>; -def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), +def IN_2r : _F2R<0b101100, (outs GRRegs:$dst), (ins GRRegs:$r), "in $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>; let Constraints = "$src = $dst" in -def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), - "inshr $dst, res[$r]", - [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, - GRRegs:$src))]>; +def INSHR_2r : + _F2RSrcDst<0b101101, (outs GRRegs:$dst), (ins GRRegs:$src, GRRegs:$r), + "inshr $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; -def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "chkct res[$r], $val", - [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; +def CHKCT_2r : _F2R<0b110010, (outs), (ins GRRegs:$r, GRRegs:$val), + "chkct res[$r], $val", + [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; -def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val), - "chkct res[$r], $val", - [(int_xcore_chkct GRRegs:$r, immUs:$val)]>; +def CHKCT_rus : _FRUSBitp<0b110011, (outs), (ins GRRegs:$r, i32imm:$val), + "chkct res[$r], $val", + [(int_xcore_chkct GRRegs:$r, immUs:$val)]>; -def TESTCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src), +def TESTCT_2r : _F2R<0b101111, (outs GRRegs:$dst), (ins GRRegs:$src), "testct $dst, res[$src]", [(set GRRegs:$dst, (int_xcore_testct GRRegs:$src))]>; -def TESTWCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src), +def TESTWCT_2r : _F2R<0b110001, (outs GRRegs:$dst), (ins GRRegs:$src), "testwct $dst, res[$src]", [(set GRRegs:$dst, (int_xcore_testwct GRRegs:$src))]>; -def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "setd res[$r], $val", - [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; +def SETD_2r : _FR2R<0b000101, (outs), (ins GRRegs:$r, GRRegs:$val), + "setd res[$r], $val", + [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; -def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), +def SETPSC_l2r : _FR2R<0b110000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setpsc res[$src1], $src2", + [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; + +def GETST_2r : _F2R<0b000001, (outs GRRegs:$dst), (ins GRRegs:$r), "getst $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>; -def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), +def INITSP_2r : _F2R<0b000100, (outs), (ins GRRegs:$src, GRRegs:$t), "init t[$t]:sp, $src", [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>; -def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), +def INITPC_2r : _F2R<0b000000, (outs), (ins GRRegs:$src, GRRegs:$t), "init t[$t]:pc, $src", [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>; -def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), +def INITCP_2r : _F2R<0b000110, (outs), (ins GRRegs:$src, GRRegs:$t), "init t[$t]:cp, $src", [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>; -def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), +def INITDP_2r : _F2R<0b000010, (outs), (ins GRRegs:$src, GRRegs:$t), "init t[$t]:dp, $src", [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>; +def PEEK_2r : _F2R<0b101110, (outs GRRegs:$dst), (ins GRRegs:$src), + "peek $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>; + +def ENDIN_2r : _F2R<0b100101, (outs GRRegs:$dst), (ins GRRegs:$src), + "endin $dst, res[$src]", + [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; + // Two operand long // getd, testlcl -def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), - "bitrev $dst, $src", - [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; +def BITREV_l2r : _FL2R<0b0000011000, (outs GRRegs:$dst), (ins GRRegs:$src), + "bitrev $dst, $src", + [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; -def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), - "byterev $dst, $src", - [(set GRRegs:$dst, (bswap GRRegs:$src))]>; +def BYTEREV_l2r : _FL2R<0b0000011001, (outs GRRegs:$dst), (ins GRRegs:$src), + "byterev $dst, $src", + [(set GRRegs:$dst, (bswap GRRegs:$src))]>; -def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), - "clz $dst, $src", - [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; +def CLZ_l2r : _FL2R<0b000111000, (outs GRRegs:$dst), (ins GRRegs:$src), + "clz $dst, $src", + [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; -def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "setc res[$r], $val", - [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; +def SETC_l2r : _FL2R<0b0010111001, (outs), (ins GRRegs:$r, GRRegs:$val), + "setc res[$r], $val", + [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; -def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), - "settw res[$r], $val", - [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; +def SETTW_l2r : _FLR2R<0b0010011001, (outs), (ins GRRegs:$r, GRRegs:$val), + "settw res[$r], $val", + [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; -def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), - "get $dst, ps[$src]", - [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>; +def GETPS_l2r : _FL2R<0b0001011001, (outs GRRegs:$dst), (ins GRRegs:$src), + "get $dst, ps[$src]", + [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>; -def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), - "set ps[$src1], $src2", - [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>; +def SETPS_l2r : _FLR2R<0b0001111000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "set ps[$src1], $src2", + [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>; -def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src), +def INITLR_l2r : _FL2R<0b0001011000, (outs), (ins GRRegs:$src, GRRegs:$t), "init t[$t]:lr, $src", [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>; -def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), - "setclk res[$src1], $src2", - [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>; - -def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), - "setrdy res[$src1], $src2", - [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; - -def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), - "setpsc res[$src1], $src2", - [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; - -def PEEK_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), - "peek $dst, res[$src]", - [(set GRRegs:$dst, (int_xcore_peek GRRegs:$src))]>; +def SETCLK_l2r : _FLR2R<0b0000111001, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setclk res[$src1], $src2", + [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>; -def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), - "endin $dst, res[$src]", - [(set GRRegs:$dst, (int_xcore_endin GRRegs:$src))]>; +def SETRDY_l2r : _FLR2R<0b0010111000, (outs), (ins GRRegs:$src1, GRRegs:$src2), + "setrdy res[$src1], $src2", + [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; // One operand short // TODO edu, eeu, waitet, waitef, tstart, clrtp // setdp, setcp, setev, kcall // dgetreg -def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i), - "msync res[$i]", - [(int_xcore_msync GRRegs:$i)]>; -def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i), - "mjoin res[$i]", - [(int_xcore_mjoin GRRegs:$i)]>; +def MSYNC_1r : _F1R<0b000111, (outs), (ins GRRegs:$a), + "msync res[$a]", + [(int_xcore_msync GRRegs:$a)]>; +def MJOIN_1r : _F1R<0b000101, (outs), (ins GRRegs:$a), + "mjoin res[$a]", + [(int_xcore_mjoin GRRegs:$a)]>; let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in -def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), - "bau $addr", - [(brind GRRegs:$addr)]>; +def BAU_1r : _F1R<0b001001, (outs), (ins GRRegs:$a), + "bau $a", + [(brind GRRegs:$a)]>; let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i), @@ -969,80 +968,80 @@ def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i), [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>; let Defs=[SP], neverHasSideEffects=1 in -def SETSP_1r : _F1R<(outs), (ins GRRegs:$src), - "set sp, $src", +def SETSP_1r : _F1R<0b001011, (outs), (ins GRRegs:$a), + "set sp, $a", []>; let hasCtrlDep = 1 in -def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src), - "ecallt $src", +def ECALLT_1r : _F1R<0b010011, (outs), (ins GRRegs:$a), + "ecallt $a", []>; let hasCtrlDep = 1 in -def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), - "ecallf $src", +def ECALLF_1r : _F1R<0b010010, (outs), (ins GRRegs:$a), + "ecallf $a", []>; let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { -def BLA_1r : _F1R<(outs), (ins GRRegs:$addr), - "bla $addr", - [(XCoreBranchLink GRRegs:$addr)]>; +def BLA_1r : _F1R<0b001000, (outs), (ins GRRegs:$a), + "bla $a", + [(XCoreBranchLink GRRegs:$a)]>; } -def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r), - "syncr res[$r]", - [(int_xcore_syncr GRRegs:$r)]>; +def SYNCR_1r : _F1R<0b100001, (outs), (ins GRRegs:$a), + "syncr res[$a]", + [(int_xcore_syncr GRRegs:$a)]>; -def FREER_1r : _F1R<(outs), (ins GRRegs:$r), - "freer res[$r]", - [(int_xcore_freer GRRegs:$r)]>; +def FREER_1r : _F1R<0b000100, (outs), (ins GRRegs:$a), + "freer res[$a]", + [(int_xcore_freer GRRegs:$a)]>; let Uses=[R11] in { -def SETV_1r : _F1R<(outs), (ins GRRegs:$r), - "setv res[$r], r11", - [(int_xcore_setv GRRegs:$r, R11)]>; +def SETV_1r : _F1R<0b010001, (outs), (ins GRRegs:$a), + "setv res[$a], r11", + [(int_xcore_setv GRRegs:$a, R11)]>; -def SETEV_1r : _F1R<(outs), (ins GRRegs:$r), - "setev res[$r], r11", - [(int_xcore_setev GRRegs:$r, R11)]>; +def SETEV_1r : _F1R<0b001111, (outs), (ins GRRegs:$a), + "setev res[$a], r11", + [(int_xcore_setev GRRegs:$a, R11)]>; } -def EEU_1r : _F1R<(outs), (ins GRRegs:$r), - "eeu res[$r]", - [(int_xcore_eeu GRRegs:$r)]>; +def EEU_1r : _F1R<0b000001, (outs), (ins GRRegs:$a), + "eeu res[$a]", + [(int_xcore_eeu GRRegs:$a)]>; // Zero operand short // TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp -def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; +def CLRE_0R : _F0R<0b0000001101, (outs), (ins), "clre", [(int_xcore_clre)]>; let Defs = [R11] in { -def GETID_0R : _F0R<(outs), (ins), +def GETID_0R : _F0R<0b0001001110, (outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; -def GETED_0R : _F0R<(outs), (ins), +def GETED_0R : _F0R<0b0000111110, (outs), (ins), "get r11, ed", [(set R11, (int_xcore_geted))]>; -def GETET_0R : _F0R<(outs), (ins), +def GETET_0R : _F0R<0b0000111111, (outs), (ins), "get r11, et", [(set R11, (int_xcore_getet))]>; } -def SSYNC_0r : _F0R<(outs), (ins), +def SSYNC_0r : _F0R<0b0000001110, (outs), (ins), "ssync", [(int_xcore_ssync)]>; let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, hasSideEffects = 1 in -def WAITEU_0R : _F0R<(outs), (ins), - "waiteu", - [(brind (int_xcore_waitevent))]>; +def WAITEU_0R : _F0R<0b0000001100, (outs), (ins), + "waiteu", + [(brind (int_xcore_waitevent))]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp new file mode 100644 index 0000000000..f96eda9fcb --- /dev/null +++ b/lib/Target/XCore/XCoreMCInstLower.cpp @@ -0,0 +1,117 @@ +//===-- XCoreMCInstLower.cpp - Convert XCore MachineInstr to MCInst -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file contains code to lower XCore MachineInstrs to their +/// corresponding MCInst records. +/// +//===----------------------------------------------------------------------===// +#include "XCoreMCInstLower.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +XCoreMCInstLower::XCoreMCInstLower(class AsmPrinter &asmprinter) +: Printer(asmprinter) {} + +void XCoreMCInstLower::Initialize(Mangler *M, MCContext *C) { + Mang = M; + Ctx = C; +} + +MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, + unsigned Offset) const { + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + const MCSymbol *Symbol; + + switch (MOTy) { + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + case MachineOperand::MO_GlobalAddress: + Symbol = Mang->getSymbol(MO.getGlobal()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_BlockAddress: + Symbol = Printer.GetBlockAddressSymbol(MO.getBlockAddress()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_ExternalSymbol: + Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName()); + Offset += MO.getOffset(); + break; + case MachineOperand::MO_JumpTableIndex: + Symbol = Printer.GetJTISymbol(MO.getIndex()); + break; + case MachineOperand::MO_ConstantPoolIndex: + Symbol = Printer.GetCPISymbol(MO.getIndex()); + Offset += MO.getOffset(); + break; + default: + llvm_unreachable("<unknown operand type>"); + } + + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); + + if (!Offset) + return MCOperand::CreateExpr(MCSym); + + // Assume offset is never negative. + assert(Offset > 0); + + const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); + const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + return MCOperand::CreateExpr(Add); +} + +MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO, + unsigned offset) const { + MachineOperandType MOTy = MO.getType(); + + switch (MOTy) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) break; + return MCOperand::CreateReg(MO.getReg()); + case MachineOperand::MO_Immediate: + return MCOperand::CreateImm(MO.getImm() + offset); + case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand(MO, MOTy, offset); + case MachineOperand::MO_RegisterMask: + break; + } + + return MCOperand(); +} + +void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MO); + + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/XCore/XCoreMCInstLower.h b/lib/Target/XCore/XCoreMCInstLower.h new file mode 100644 index 0000000000..28e702bb98 --- /dev/null +++ b/lib/Target/XCore/XCoreMCInstLower.h @@ -0,0 +1,42 @@ +//===-- XCoreMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREMCINSTLOWER_H +#define XCOREMCINSTLOWER_H +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCContext; + class MCInst; + class MCOperand; + class MachineInstr; + class MachineFunction; + class Mangler; + class AsmPrinter; + +/// \brief This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY XCoreMCInstLower { + typedef MachineOperand::MachineOperandType MachineOperandType; + MCContext *Ctx; + Mangler *Mang; + AsmPrinter &Printer; +public: + XCoreMCInstLower(class AsmPrinter &asmprinter); + void Initialize(Mangler *mang, MCContext *C); + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; + +private: + MCOperand LowerSymbolOperand(const MachineOperand &MO, + MachineOperandType MOTy, unsigned Offset) const; +}; +} + +#endif diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index d8517d7b4e..e637d9a5be 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -22,7 +22,8 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -30,7 +31,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #define GET_REGINFO_TARGET_DESC #include "XCoreGenRegisterInfo.inc" diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td index 9edfda1f50..4c771e9700 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.td +++ b/lib/Target/XCore/XCoreRegisterInfo.td @@ -45,10 +45,10 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>; def GRRegs : RegisterClass<"XCore", [i32], 32, // Return values and arguments (add R0, R1, R2, R3, - // Not preserved across procedure calls - R11, // Callee save - R4, R5, R6, R7, R8, R9, R10)>; + R4, R5, R6, R7, R8, R9, R10, + // Not preserved across procedure calls + R11)>; // Reserved def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> { diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index b130fb9daa..28c3d12c05 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -13,7 +13,7 @@ #include "XCoreTargetMachine.h" #include "XCore.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -32,7 +32,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, InstrInfo(), FrameLowering(Subtarget), TLInfo(*this), - TSInfo(*this), STTI(&TLInfo), VTTI(&TLInfo) { + TSInfo(*this) { } namespace { diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 42bfcb4a03..eb9a1aa420 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -19,9 +19,8 @@ #include "XCoreInstrInfo.h" #include "XCoreSelectionDAGInfo.h" #include "XCoreSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -32,8 +31,6 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreFrameLowering FrameLowering; XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -56,12 +53,6 @@ public: virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } // Pass Pipeline Configuration |