diff options
Diffstat (limited to 'lib/Target')
231 files changed, 14838 insertions, 10228 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index cca6d12e16..dc41f2f605 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, return TopOfFrameOffset - FrameRegPos; } -/// Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - // FIXME: Make generic? Really consider after upstreaming. This code is now - // shared between PEI, ARM *and* here. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = - (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); @@ -449,7 +396,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // helpfully places it near either SP or FP for us to avoid // infinitely-regression during scavenging. const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 7b93463244..cf3a2c3707 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -618,11 +618,11 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB, int64_t NumBytes, MachineInstr::MIFlag MIFlags) { if (NumBytes == 0 && DstReg == SrcReg) return; - else if (abs(NumBytes) & ~0xffffff) { + else if (abs64(NumBytes) & ~0xffffff) { // Generically, we have to materialize the offset into a temporary register // and subtract it. There are a couple of ways this could be done, for now // we'll use a movz/movk or movn/movk sequence. - uint64_t Bits = static_cast<uint64_t>(abs(NumBytes)); + uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes)); BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) .addImm(0xffff & Bits).addImm(0) .setMIFlags(MIFlags); @@ -673,7 +673,7 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB, } else { LowOp = AArch64::SUBxxi_lsl0_s; HighOp = AArch64::SUBxxi_lsl12_s; - NumBytes = abs(NumBytes); + NumBytes = abs64(NumBytes); } // If we're here, at the very least a move needs to be produced, which just diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index c1695dacb4..69bb80a485 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -160,44 +160,53 @@ private: SMLoc StartLoc, EndLoc; + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + union { - struct { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - } ImmWithLSL; - - struct { - A64CC::CondCodes Code; - } CondCode; - - struct { - double Val; - } FPImm; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned RegNum; - } Reg; - - struct { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - } ShiftExtend; - - struct { - const char *Data; - unsigned Length; - } SysReg; - - struct { - const char *Data; - unsigned Length; - } Tok; + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; }; AArch64Operand(KindTy K, SMLoc S, SMLoc E) diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index ab9bba1836..c6690a96c7 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -194,7 +194,17 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { {"rvbar_el3", RVBAR_EL3}, {"isr_el1", ISR_EL1}, {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0} + {"cntvct_el0", CNTVCT_EL0}, + + // GICv3 registers + {"icc_iar1_el1", ICC_IAR1_EL1}, + {"icc_iar0_el1", ICC_IAR0_EL1}, + {"icc_hppir1_el1", ICC_HPPIR1_EL1}, + {"icc_hppir0_el1", ICC_HPPIR0_EL1}, + {"icc_rpr_el1", ICC_RPR_EL1}, + {"ich_vtr_el2", ICH_VTR_EL2}, + {"ich_eisr_el2", ICH_EISR_EL2}, + {"ich_elsr_el2", ICH_ELSR_EL2} }; A64SysReg::MRSMapper::MRSMapper() { @@ -205,7 +215,15 @@ A64SysReg::MRSMapper::MRSMapper() { const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0} + {"pmswinc_el0", PMSWINC_EL0}, + + // GICv3 registers + {"icc_eoir1_el1", ICC_EOIR1_EL1}, + {"icc_eoir0_el1", ICC_EOIR0_EL1}, + {"icc_dir_el1", ICC_DIR_EL1}, + {"icc_sgi1r_el1", ICC_SGI1R_EL1}, + {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, + {"icc_sgi0r_el1", ICC_SGI0R_EL1} }; A64SysReg::MSRMapper::MSRMapper() { @@ -467,6 +485,56 @@ const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { {"pmevtyper28_el0", PMEVTYPER28_EL0}, {"pmevtyper29_el0", PMEVTYPER29_EL0}, {"pmevtyper30_el0", PMEVTYPER30_EL0}, + + // GICv3 registers + {"icc_bpr1_el1", ICC_BPR1_EL1}, + {"icc_bpr0_el1", ICC_BPR0_EL1}, + {"icc_pmr_el1", ICC_PMR_EL1}, + {"icc_ctlr_el1", ICC_CTLR_EL1}, + {"icc_ctlr_el3", ICC_CTLR_EL3}, + {"icc_sre_el1", ICC_SRE_EL1}, + {"icc_sre_el2", ICC_SRE_EL2}, + {"icc_sre_el3", ICC_SRE_EL3}, + {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, + {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, + {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, + {"icc_seien_el1", ICC_SEIEN_EL1}, + {"icc_ap0r0_el1", ICC_AP0R0_EL1}, + {"icc_ap0r1_el1", ICC_AP0R1_EL1}, + {"icc_ap0r2_el1", ICC_AP0R2_EL1}, + {"icc_ap0r3_el1", ICC_AP0R3_EL1}, + {"icc_ap1r0_el1", ICC_AP1R0_EL1}, + {"icc_ap1r1_el1", ICC_AP1R1_EL1}, + {"icc_ap1r2_el1", ICC_AP1R2_EL1}, + {"icc_ap1r3_el1", ICC_AP1R3_EL1}, + {"ich_ap0r0_el2", ICH_AP0R0_EL2}, + {"ich_ap0r1_el2", ICH_AP0R1_EL2}, + {"ich_ap0r2_el2", ICH_AP0R2_EL2}, + {"ich_ap0r3_el2", ICH_AP0R3_EL2}, + {"ich_ap1r0_el2", ICH_AP1R0_EL2}, + {"ich_ap1r1_el2", ICH_AP1R1_EL2}, + {"ich_ap1r2_el2", ICH_AP1R2_EL2}, + {"ich_ap1r3_el2", ICH_AP1R3_EL2}, + {"ich_hcr_el2", ICH_HCR_EL2}, + {"ich_misr_el2", ICH_MISR_EL2}, + {"ich_vmcr_el2", ICH_VMCR_EL2}, + {"ich_vseir_el2", ICH_VSEIR_EL2}, + {"ich_lr0_el2", ICH_LR0_EL2}, + {"ich_lr1_el2", ICH_LR1_EL2}, + {"ich_lr2_el2", ICH_LR2_EL2}, + {"ich_lr3_el2", ICH_LR3_EL2}, + {"ich_lr4_el2", ICH_LR4_EL2}, + {"ich_lr5_el2", ICH_LR5_EL2}, + {"ich_lr6_el2", ICH_LR6_EL2}, + {"ich_lr7_el2", ICH_LR7_EL2}, + {"ich_lr8_el2", ICH_LR8_EL2}, + {"ich_lr9_el2", ICH_LR9_EL2}, + {"ich_lr10_el2", ICH_LR10_EL2}, + {"ich_lr11_el2", ICH_LR11_EL2}, + {"ich_lr12_el2", ICH_LR12_EL2}, + {"ich_lr13_el2", ICH_LR13_EL2}, + {"ich_lr14_el2", ICH_LR14_EL2}, + {"ich_lr15_el2", ICH_LR15_EL2} }; uint32_t diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 5eebf44431..c9b6e23de3 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -354,13 +354,31 @@ namespace A64SysReg { RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 ISR_EL1 = 0xc608, // 11 000 1100 0001 000 CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 - CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 + CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 + + // GICv3 registers + ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000 + ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000 + ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010 + ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010 + ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011 + ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001 + ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011 + ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101 }; enum SysRegWOValues { DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 - PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 + PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100 + + // GICv3 registers + ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001 + ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001 + ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001 + ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101 + ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110 + ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111 }; enum SysRegValues { @@ -616,7 +634,57 @@ namespace A64SysReg { PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 - PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 + PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110 + + // GICv3 registers + ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011 + ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011 + ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000 + ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100 + ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100 + ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101 + ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101 + ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101 + ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110 + ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111 + ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111 + ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000 + ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100 + ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101 + ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110 + ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111 + ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000 + ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001 + ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010 + ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011 + ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000 + ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001 + ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010 + ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011 + ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000 + ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001 + ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010 + ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011 + ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000 + ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010 + ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111 + ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100 + ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000 + ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001 + ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010 + ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011 + ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100 + ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101 + ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110 + ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111 + ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000 + ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001 + ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010 + ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011 + ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100 + ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 + ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 + ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111 }; // Note that these do not inherit from NamedImmMapper. This class is diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp new file mode 100644 index 0000000000..f0d4dbe2bf --- /dev/null +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -0,0 +1,704 @@ +//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The Cortex-A15 processor employs a tracking scheme in its register renaming +// in order to process each instruction's micro-ops speculatively and +// out-of-order with appropriate forwarding. The ARM architecture allows VFP +// instructions to read and write 32-bit S-registers. Each S-register +// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. +// +// There are several instruction patterns which can be used to provide this +// capability which can provide higher performance than other, potentially more +// direct patterns, specifically around when one micro-op reads a D-register +// operand that has recently been written as one or more S-register results. +// +// This file defines a pre-regalloc pass which looks for SPR producers which +// are going to be used by a DPR (or QPR) consumers and creates the more +// optimized access pattern. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "a15-sd-optimizer" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <set> + +using namespace llvm; + +namespace { + struct A15SDOptimizer : public MachineFunctionPass { + static char ID; + A15SDOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM A15 S->D optimizer"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + bool runOnInstruction(MachineInstr *MI); + + // + // Instruction builder helpers + // + unsigned createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, + bool QPR=false); + + unsigned createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC); + + unsigned createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1); + + unsigned createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2); + + unsigned createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert); + + unsigned createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL); + + // + // Various property checkers + // + bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); + bool hasPartialWrite(MachineInstr *MI); + SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); + unsigned getDPRLaneFromSPR(unsigned SReg); + + // + // Methods used for getting the definitions of partial registers + // + + MachineInstr *elideCopies(MachineInstr *MI); + void elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs); + + // + // Pattern optimization methods + // + unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); + unsigned optimizeSDPattern(MachineInstr *MI); + unsigned getPrefSPRLane(unsigned SReg); + + // + // Sanitizing method - used to make sure if don't leave dead code around. + // + void eraseInstrWithNoUses(MachineInstr *MI); + + // + // A map used to track the changes done by this pass. + // + std::map<MachineInstr*, unsigned> Replacements; + std::set<MachineInstr *> DeadInstr; + }; + char A15SDOptimizer::ID = 0; +} // end anonymous namespace + +// Returns true if this is a use of a SPR register. +bool A15SDOptimizer::usesRegClass(MachineOperand &MO, + const TargetRegisterClass *TRC) { + if (!MO.isReg()) + return false; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); + else + return TRC->contains(Reg); +} + +unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, + &ARM::DPRRegClass); + if (DReg != ARM::NoRegister) return ARM::ssub_1; + return ARM::ssub_0; +} + +// Get the subreg type that is most likely to be coalesced +// for an SPR register that will be used in VDUP32d pseudo. +unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { + if (!TRI->isVirtualRegister(SReg)) + return getDPRLaneFromSPR(SReg); + + MachineInstr *MI = MRI->getVRegDef(SReg); + if (!MI) return ARM::ssub_0; + MachineOperand *MO = MI->findRegisterDefOperand(SReg); + + assert(MO->isReg() && "Non register operand found!"); + if (!MO) return ARM::ssub_0; + + if (MI->isCopy() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + SReg = MI->getOperand(1).getReg(); + } + + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; + return ARM::ssub_0; + } + return getDPRLaneFromSPR(SReg); +} + +// MI is known to be dead. Figure out what instructions +// are also made dead by this and mark them for removal. +void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { + SmallVector<MachineInstr *, 8> Front; + DeadInstr.insert(MI); + + DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); + Front.push_back(MI); + + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // MI is already known to be dead. We need to see + // if other instructions can also be removed. + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if ((!MO.isReg()) || (!MO.isUse())) + continue; + unsigned Reg = MO.getReg(); + if (!TRI->isVirtualRegister(Reg)) + continue; + MachineOperand *Op = MI->findRegisterDefOperand(Reg); + + if (!Op) + continue; + + MachineInstr *Def = Op->getParent(); + + // We don't need to do anything if we have already marked + // this instruction as being dead. + if (DeadInstr.find(Def) != DeadInstr.end()) + continue; + + // Check if all the uses of this instruction are marked as + // dead. If so, we can also mark this instruction as being + // dead. + bool IsDead = true; + for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { + MachineOperand &MODef = Def->getOperand(j); + if ((!MODef.isReg()) || (!MODef.isDef())) + continue; + unsigned DefReg = MODef.getReg(); + if (!TRI->isVirtualRegister(DefReg)) { + IsDead = false; + break; + } + for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), + EE = MRI->use_end(); + II != EE; ++II) { + // We don't care about self references. + if (&*II == Def) + continue; + if (DeadInstr.find(&*II) == DeadInstr.end()) { + IsDead = false; + break; + } + } + } + + if (!IsDead) continue; + + DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); + DeadInstr.insert(Def); + } + } +} + +// Creates the more optimized patterns and generally does all the code +// transformations in this pass. +unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { + if (MI->isCopy()) { + return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); + } + + if (MI->isInsertSubreg()) { + unsigned DPRReg = MI->getOperand(1).getReg(); + unsigned SPRReg = MI->getOperand(2).getReg(); + + if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); + + if (DPRMI && SPRMI) { + // See if the first operand of this insert_subreg is IMPLICIT_DEF + MachineInstr *ECDef = elideCopies(DPRMI); + if (ECDef != 0 && ECDef->isImplicitDef()) { + // Another corner case - if we're inserting something that is purely + // a subreg copy of a DPR, just use that DPR. + + MachineInstr *EC = elideCopies(SPRMI); + // Is it a subreg copy of ssub_0? + if (EC && EC->isCopy() && + EC->getOperand(1).getSubReg() == ARM::ssub_0) { + DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); + + // Find the thing we're subreg copying out of - is it of the same + // regclass as DPRMI? (i.e. a DPR or QPR). + unsigned FullReg = SPRMI->getOperand(1).getReg(); + const TargetRegisterClass *TRC = + MRI->getRegClass(MI->getOperand(1).getReg()); + if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { + DEBUG(dbgs() << "Subreg copy is compatible - returning "); + DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + eraseInstrWithNoUses(MI); + return FullReg; + } + } + + return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); + } + } + } + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + // See if all bar one of the operands are IMPLICIT_DEF and insert the + // optimizer pattern accordingly. + unsigned NumImplicit = 0, NumTotal = 0; + unsigned NonImplicitReg = ~0U; + + for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { + if (!MI->getOperand(I).isReg()) + continue; + ++NumTotal; + unsigned OpReg = MI->getOperand(I).getReg(); + + if (!TRI->isVirtualRegister(OpReg)) + break; + + MachineInstr *Def = MRI->getVRegDef(OpReg); + if (!Def) + break; + if (Def->isImplicitDef()) + ++NumImplicit; + else + NonImplicitReg = MI->getOperand(I).getReg(); + } + + if (NumImplicit == NumTotal - 1) + return optimizeAllLanesPattern(MI, NonImplicitReg); + else + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + assert(0 && "Unhandled update pattern!"); + return 0; +} + +// Return true if this MachineInstr inserts a scalar (SPR) value into +// a D or Q register. +bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { + // The only way we can do a partial register update is through a COPY, + // INSERT_SUBREG or REG_SEQUENCE. + if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), + &ARM::SPRRegClass)) + return true; + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + return false; +} + +// Looks through full copies to get the instruction that defines the input +// operand for MI. +MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { + if (!MI->isFullCopy()) + return MI; + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + return NULL; + MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!Def) + return NULL; + return elideCopies(Def); +} + +// Look through full copies and PHIs to get the set of non-copy MachineInstrs +// that can produce MI. +void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs) { + // Looking through PHIs may create loops so we need to track what + // instructions we have visited before. + std::set<MachineInstr *> Reached; + SmallVector<MachineInstr *, 8> Front; + Front.push_back(MI); + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // If we have already explored this MachineInstr, ignore it. + if (Reached.find(MI) != Reached.end()) + continue; + Reached.insert(MI); + if (MI->isPHI()) { + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + unsigned Reg = MI->getOperand(I).getReg(); + if (!TRI->isVirtualRegister(Reg)) { + continue; + } + MachineInstr *NewMI = MRI->getVRegDef(Reg); + if (!NewMI) + continue; + Front.push_back(NewMI); + } + } else if (MI->isFullCopy()) { + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + continue; + MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!NewMI) + continue; + Front.push_back(NewMI); + } else { + DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); + Outs.push_back(MI); + } + } +} + +// Return the DPR virtual registers that are read by this machine instruction +// (if any). +SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { + if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || + MI->isKill()) + return SmallVector<unsigned, 8>(); + + SmallVector<unsigned, 8> Defs; + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg() || !MO.isUse()) + continue; + if (!usesRegClass(MO, &ARM::DPRRegClass) && + !usesRegClass(MO, &ARM::QPRRegClass)) + continue; + + Defs.push_back(MO.getReg()); + } + return Defs; +} + +// Creates a DPR register from an SPR one by using a VDUP. +unsigned +A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, bool QPR) { + unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : + &ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), + Out) + .addReg(Reg) + .addImm(Lane)); + + return Out; +} + +// Creates a SPR register from a DPR by copying the value in lane 0. +unsigned +A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { + unsigned Out = MRI->createVirtualRegister(TRC); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::COPY), Out) + .addReg(DReg, 0, Lane); + + return Out; +} + +// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. +unsigned +A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2) { + unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::REG_SEQUENCE), Out) + .addReg(Reg1) + .addImm(ARM::dsub_0) + .addReg(Reg2) + .addImm(ARM::dsub_1); + return Out; +} + +// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) +// and merges them into one DPR register. +unsigned +A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1)); + return Out; +} + +unsigned +A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::INSERT_SUBREG), Out) + .addReg(DReg) + .addReg(ToInsert) + .addImm(Lane); + + return Out; +} + +unsigned +A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::IMPLICIT_DEF), Out); + return Out; +} + +// This function inserts instructions in order to optimize interactions between +// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all +// lanes, and the using VEXT instructions to recompose the result. +unsigned +A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { + MachineBasicBlock::iterator InsertPt(MI); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &MBB = *MI->getParent(); + InsertPt++; + unsigned Out; + + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_0, &ARM::DPRRegClass); + unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_1, &ARM::DPRRegClass); + + unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); + unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); + Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); + + Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); + + } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { + unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + } else { + assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && + "Found unexpected regclass!"); + + unsigned PrefLane = getPrefSPRLane(Reg); + unsigned Lane; + switch (PrefLane) { + case ARM::ssub_0: Lane = 0; break; + case ARM::ssub_1: Lane = 1; break; + default: llvm_unreachable("Unknown preferred lane!"); + } + + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + + Out = createImplicitDef(MBB, InsertPt, DL); + Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); + Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); + eraseInstrWithNoUses(MI); + } + return Out; +} + +bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { + // We look for instructions that write S registers that are then read as + // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and + // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or + // merge two SPR values to form a DPR register. In order avoid false + // positives we make sure that there is an SPR producer so we look past + // COPY and PHI nodes to find it. + // + // The best code pattern for when an SPR producer is going to be used by a + // DPR or QPR consumer depends on whether the other lanes of the + // corresponding DPR/QPR are currently defined. + // + // We can handle these efficiently, depending on the type of + // pseudo-instruction that is producing the pattern + // + // * COPY: * VDUP all lanes and merge the results together + // using VEXTs. + // + // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR + // lane, and the other lane(s) of the DPR/QPR register + // that we are inserting in are undefined, use the + // original DPR/QPR value. + // * Otherwise, fall back on the same stategy as COPY. + // + // * REG_SEQUENCE: * If all except one of the input operands are + // IMPLICIT_DEFs, insert the VDUP pattern for just the + // defined input operand + // * Otherwise, fall back on the same stategy as COPY. + // + + // First, get all the reads of D-registers done by this instruction. + SmallVector<unsigned, 8> Defs = getReadDPRs(MI); + bool Modified = false; + + for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + // Follow the def-use chain for this DPR through COPYs, and also through + // PHIs (which are essentially multi-way COPYs). It is because of PHIs that + // we can end up with multiple defs of this DPR. + + SmallVector<MachineInstr *, 8> DefSrcs; + if (!TRI->isVirtualRegister(*I)) + continue; + MachineInstr *Def = MRI->getVRegDef(*I); + if (!Def) + continue; + + elideCopiesAndPHIs(Def, DefSrcs); + + for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + EE = DefSrcs.end(); II != EE; ++II) { + MachineInstr *MI = *II; + + // If we've already analyzed and replaced this operand, don't do + // anything. + if (Replacements.find(MI) != Replacements.end()) + continue; + + // Now, work out if the instruction causes a SPR->DPR dependency. + if (!hasPartialWrite(MI)) + continue; + + // Collect all the uses of this MI's DPR def for updating later. + SmallVector<MachineOperand*, 8> Uses; + unsigned DPRDefReg = MI->getOperand(0).getReg(); + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), + E = MRI->use_end(); I != E; ++I) + Uses.push_back(&I.getOperand()); + + // We can optimize this. + unsigned NewReg = optimizeSDPattern(MI); + + if (NewReg != 0) { + Modified = true; + for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + DEBUG(dbgs() << "Replacing operand " + << **I << " with " + << PrintReg(NewReg) << "\n"); + (*I)->substVirtReg(NewReg, 0, *TRI); + } + } + Replacements[MI] = NewReg; + } + } + return Modified; +} + +bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + bool Modified = false; + + DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); + + DeadInstr.clear(); + Replacements.clear(); + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + + for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); + MI != ME;) { + Modified |= runOnInstruction(MI++); + } + + } + + for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), + E = DeadInstr.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Modified; +} + +FunctionPass *llvm::createA15SDOptimizerPass() { + return new A15SDOptimizer(); +} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 5faf8c320c..80e5f37eb0 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 46915eecf6..68380847a0 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -143,14 +143,12 @@ include "ARMSchedule.td" // ARM processor families. def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 58c779830e..13ec208793 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) .addReg(ARM::PC) - .addImm(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ed001ea24a..126f160f6d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -3734,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - // A9-like cores are particularly picky about mixing the two and want these + // CortexA9 is particularly picky about mixing the two and wants these // converted. - if (Subtarget.isLikeA9() && !isPredicated(MI) && + if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) @@ -4023,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. // // FCONSTD can be used as a dependency-breaking instruction. - - unsigned ARMBaseInstrInfo:: getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - // Only Swift has partial register update problems. - if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + if (!SwiftPartialUpdateClearance || + !(Subtarget.isSwift() || Subtarget.isCortexA15())) return 0; assert(TRI && "Need TRI instance"); @@ -4056,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI, // Explicitly reads the dependency. case ARM::VLD1LNd32: - UseOp = 1; + UseOp = 3; break; default: return 0; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index abdd251743..b6b27f849a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -680,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(TFI->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0ca6450e2b..7a02adf246 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, return FnSize; } -/// estimateStackSize - Estimate and return the size of the frame. -/// FIXME: Make generic? -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. @@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = estimateStackSize(MF); + unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) MRI.setPhysRegUsed(ARM::R4); } @@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // worth the effort and added fragility? bool BigStack = (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); @@ -1419,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 514971f01e..bb26090d2d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + // Custom expand long extensions to vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -870,8 +880,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - BenefitFromCodePlacementOpt = true; - // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->isLikeA9(); @@ -3433,6 +3441,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), +/// and size(DestVec) > 128-bits. +/// This is achieved by doing the one extension from the SrcVec, splitting the +/// result, extending these parts, and then concatenating these into the +/// destination. +static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DestVT = N->getValueType(0); + + assert(DestVT.getSizeInBits() > 128 && + "Custom sext/zext expansion needs >128-bit vector."); + // If this is a normal length extension, use the default expansion. + if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && + SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) + return SDValue(); + + DebugLoc dl = N->getDebugLoc(); + unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); + unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); + unsigned NumElts = SrcVT.getVectorNumElements(); + LLVMContext &Ctx = *DAG.getContext(); + SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; + + EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts); + EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts/2); + EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), + NumElts/2); + + Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); + SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(0)); + SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(NumElts/2)); + ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); + ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -5621,6 +5670,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = ExpandVectorExtension(N, DAG); + break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 9409f35974..c2f357e84f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1010,7 +1010,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1022,7 +1023,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1037,7 +1039,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1052,7 +1055,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1079,7 +1083,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1091,7 +1096,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [/* pattern left blank */]> { + [/* pattern left blank */]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1105,7 +1111,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1120,7 +1127,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1145,24 +1153,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]>; def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, iir, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> { + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; } def rsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_imm:$shift))]>; + so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]>; def rsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_reg:$shift))]>; + so_reg_reg:$shift))]>, + Sched<[WriteALUSsr, ReadALUsr]>; } } @@ -1174,19 +1186,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]>; def rsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, - GPR:$Rn))]>; + GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]>; def rsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, - GPR:$Rn))]>; + GPR:$Rn))]>, + Sched<[WriteALUSsr, ReadALUsr]>; } } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 98bd6c168e..e4e683c2a0 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -865,7 +865,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. - if (isLd && MI->getOperand(0).getReg() == Base) + if (MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 02196d06bf..7eb5ff665a 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -6,6 +6,70 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Instruction scheduling annotations for out-of-order CPUs. +// These annotations are independent of the itinerary class defined below. +// Here we define the subtarget independent read/write per-operand resources. +// The subtarget schedule definitions will then map these to the subtarget's +// resource usages. +// For example: +// The instruction cycle timings table might contain an entry for an operation +// like the following: +// Rd <- ADD Rn, Rm, <shift> Rs +// Uops | Latency from register | Uops - resource requirements - latency +// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 +// | | uopc Rd, Rn, T0 - P01 - 1 +// This is telling us that the result will be available in destination register +// Rd after a minimum of three cycles after the result in Rm and Rs is available +// and one cycle after the result in Rn is available. The micro-ops can execute +// on resource P01. +// To model this, we need to express that we need to dispatch two micro-ops, +// that the resource P01 is needed and that the latency to Rn is different than +// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by +// two. +// We will do this by assigning (abstract) resources to register defs/uses. +// ARMSchedule.td: +// def WriteALUsr : SchedWrite; +// def ReadAdvanceALUsr : ScheRead; +// +// ARMInstrInfo.td: +// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, +// ReadDefault]> { ...} +// ReadAdvance read resources allow us to define "pipeline by-passes" or +// shorter latencies to certain registers as needed in the example above. +// The "ReadDefault" can be omitted. +// Next, the subtarget td file assigns resources to the abstract resources +// defined here. +// ARMScheduleSubtarget.td: +// // Resources. +// def P01 : ProcResource<3>; // ALU unit (3 of it). +// ... +// // Resource usages. +// def : WriteRes<WriteALUsr, [P01, P01]> { +// Latency = 4; // Latency of 4. +// NumMicroOps = 2; // Dispatch 2 micro-ops. +// // The two instances of resource P01 are occupied for one cycle. It is one +// // cycle because these resources happen to be pipelined. +// ResourceCycles = [1, 1]; +// } +// def : ReadAdvance<ReadAdvanceALUsr, 3>; + +// Basic ALU operation. +def WriteALU : SchedWrite; +def ReadALU : SchedRead; + +// Basic ALU with shifts. +def WriteALUsi : SchedWrite; // Shift by immediate. +def WriteALUsr : SchedWrite; // Shift by register. +def WriteALUSsr : SchedWrite; // Shift by register (flag setting). +def ReadALUsr : SchedRead; // Some operands are read later. + +// Define TII for use in SchedVariant Predicates. +def : PredicateProlog<[{ + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); + (void)TII; +}]>; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 4191931a5a..382e6cc4cd 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1898,6 +1898,8 @@ def CortexA9Model : SchedMachineModel { //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. +let SchedModel = CortexA9Model in { + def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; @@ -1918,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>; def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } // Basic ALU. -def A9WriteA : SchedWriteRes<[A9UnitALU]>; +def : WriteRes<WriteALU, [A9UnitALU]>; // ALU with operand shifted by immediate. -def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } +def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } // ALU with operand shifted by register. -def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } +def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } // Multiplication def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } @@ -2003,13 +2005,6 @@ foreach NumCycles = 2-8 in { def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; } // foreach NumCycles -// Define TII for use in SchedVariant Predicates. -def : PredicateProlog<[{ - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); - (void)TII; -}]>; - // Define address generation sequences and predicates for 8 flavors of LDMs. foreach NumAddr = 1-8 in { @@ -2254,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[ // These mov immediate writers are unconditionally expanded with // additive latency. def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; -def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>; +def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; // Some ALU operations can read loaded integer values one cycle early. -def A9ReadA : SchedReadAdvance<1, +def A9ReadALU : SchedReadAdvance<1, [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, @@ -2279,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>; // This table follows the ARM Cortex-A9 Technical Reference Manuals, // mostly in order. -let SchedModel = CortexA9Model in { def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, IIC_iMVNi,IIC_iMVNsi, IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; -def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>; +def :ItinRW<[A9WriteI,ReadALU],[IIC_iMVNr]>; def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; -def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; -def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; -def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>; -def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; -def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>; -def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB -def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; -def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>; +def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; +def :ItinRW<[WriteALU, ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; +def :ItinRW<[WriteALU, ReadALU, ReadALU],[IIC_iALUr,IIC_iCMPr]>; +def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; +def :ItinRW<[WriteALUsi, ReadALU], [IIC_iALUsi]>; +def :ItinRW<[WriteALUsi, ReadDefault, ReadALU], [IIC_iALUsir]>; // RSB +def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; +def :ItinRW<[A9WriteALUsr, ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; // A9WriteHi ignored for MUL32. def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, @@ -2371,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, IIC_iStore_m, IIC_iStore_mu]>; def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; -def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>; +def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; @@ -2486,4 +2480,11 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; + +// Map SchedRWs that are identical for cortexa9 to existing resources. +def : SchedAlias<WriteALUsr, A9WriteALUsr>; +def : SchedAlias<WriteALUSsr, A9WriteALUsr>; +def : SchedAlias<ReadALU, A9ReadALU>; +def : SchedAlias<ReadALUsr, A9ReadALU>; + } // SchedModel = CortexA9Model diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index e9bc3e0f39..28bb429feb 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1078,8 +1078,29 @@ def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. let MinLatency = 0; // Data dependencies are allowed within dispatch groups. let LoadLatency = 3; + let MispredictPenalty = 14; // A branch direction mispredict. let Itineraries = SwiftItineraries; } -// TODO: Add Swift processor and scheduler resources. +// Swift resource mapping. +let SchedModel = SwiftModel in { + // Processor resources. + def SwiftUnitP01 : ProcResource<2>; // ALU unit. + def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit. + def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit. + def SwiftUnitP2 : ProcResource<1>; // LS unit. + def SwiftUnitDiv : ProcResource<1>; + + // 4.2.4 Arithmetic and Logical. + // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR + // AND,BIC, EOR,ORN,ORR + // CLZ,RBIT,REV,REV16,REVSH,PKH + // Single cycle. + def : WriteRes<WriteALU, [SwiftUnitP01]>; + def : WriteRes<WriteALUsi, [SwiftUnitP01]>; + def : WriteRes<WriteALUsr, [SwiftUnitP01]>; + def : WriteRes<WriteALUSsr, [SwiftUnitP01]>; + def : ReadAdvance<ReadALU, 0>; + def : ReadAdvance<ReadALUsr, 2>; +} diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e11314d4fc..739300e4ef 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) + const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) + , Options(Options) , TargetABI(ARM_ABI_APCS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); @@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() { AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; + UnsafeFPMath = false; } void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { @@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // configuration. if (!StrictAlign && hasV6Ops() && isTargetDarwin()) AllowsUnalignedMem = true; + + // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. + uint64_t Bits = getFeatureBits(); + if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters + (Options.UnsafeFPMath || isTargetDarwin())) + UseNEONForSinglePrecisionFP = true; } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8ce22e1de2..5b5ee6aeb8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,6 +26,7 @@ namespace llvm { class GlobalValue; class StringRef; +class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: @@ -159,6 +160,9 @@ protected: /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; + /// Target machine allowed unsafe FP math (such as use of NEON fp) + bool UnsafeFPMath; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -175,6 +179,9 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; + /// Options passed via command line that could influence the target + const TargetOptions &Options; + public: enum { isELF, isDarwin @@ -189,7 +196,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 774521852a..42c7d2c437 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +static cl::opt<bool> +DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, + cl::desc("Inhibit optimization of S->D register accesses on A15"), + cl::init(false)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), + Subtarget(TT, CPU, FS, Options), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI @@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() { addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && + getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { + addPass(createA15SDOptimizerPass()); + } return true; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 01c04b48cf..1019b972e9 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // Single to/from double precision conversions. + static const CostTblEntry<MVT> NEONFltDblTbl[] = { + // Vector fptrunc/fpext conversions. + { ISD::FP_ROUND, MVT::v2f64, 2 }, + { ISD::FP_EXTEND, MVT::v2f32, 2 }, + { ISD::FP_EXTEND, MVT::v4f32, 4 } + }; + + if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || + ISD == ISD::FP_EXTEND)) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), + ISD, LT.second); + if (Idx != -1) + return LT.first * NEONFltDblTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); @@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + // The number of vmovl instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + + // Operations that we legalize using load/stores to the stack. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, // Vector double <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } }; if (SrcTy.isVector() && ST->hasNEON()) { @@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return NEONFloatConversionTbl[Idx].Cost; } - // Scalar integer to float conversions. static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, @@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return ARMIntegerConversionTbl[Idx].Cost; } - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } @@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6c678fdbd7..ed7b7ec9d2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SmallVector<unsigned, 8> Registers; + struct CCOp { + ARMCC::CondCodes Val; + }; + + struct CopOp { + unsigned Val; + }; + + struct CoprocOptionOp { + unsigned Val; + }; + + struct ITMaskOp { + unsigned Mask:4; + }; + + struct MBOptOp { + ARM_MB::MemBOpt Val; + }; + + struct IFlagsOp { + ARM_PROC::IFlags Val; + }; + + struct MMaskOp { + unsigned Val; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + }; + + struct VectorIndexOp { + unsigned Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + /// Combined record for all forms of ARM address expressions. + struct MemoryOp { + unsigned BaseRegNum; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (2, 4, 8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + }; + + struct PostIdxRegOp { + unsigned RegNum; + bool isAdd; + ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + }; + + struct ShifterImmOp { + bool isASR; + unsigned Imm; + }; + + struct RegShiftedRegOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + }; + + struct RegShiftedImmOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + }; + + struct RotImmOp { + unsigned Imm; + }; + + struct BitfieldOp { + unsigned LSB; + unsigned Width; + }; + union { - struct { - ARMCC::CondCodes Val; - } CC; - - struct { - unsigned Val; - } Cop; - - struct { - unsigned Val; - } CoprocOption; - - struct { - unsigned Mask:4; - } ITMask; - - struct { - ARM_MB::MemBOpt Val; - } MBOpt; - - struct { - ARM_PROC::IFlags Val; - } IFlags; - - struct { - unsigned Val; - } MMask; - - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - // A vector register list is a sequential list of 1 to 4 registers. - struct { - unsigned RegNum; - unsigned Count; - unsigned LaneIndex; - bool isDoubleSpaced; - } VectorList; - - struct { - unsigned Val; - } VectorIndex; - - struct { - const MCExpr *Val; - } Imm; - - /// Combined record for all forms of ARM address expressions. - struct { - unsigned BaseRegNum; - // Offset is in OffsetReg or OffsetImm. If both are zero, no offset - // was specified. - const MCConstantExpr *OffsetImm; // Offset immediate value - unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL - ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg - unsigned ShiftImm; // shift for OffsetReg. - unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (2, 4, 8, 16, or 32) - unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) - } Memory; - - struct { - unsigned RegNum; - bool isAdd; - ARM_AM::ShiftOpc ShiftTy; - unsigned ShiftImm; - } PostIdxReg; - - struct { - bool isASR; - unsigned Imm; - } ShifterImm; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftReg; - unsigned ShiftImm; - } RegShiftedReg; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftImm; - } RegShiftedImm; - struct { - unsigned Imm; - } RotImm; - struct { - unsigned LSB; - unsigned Width; - } Bitfield; + struct CCOp CC; + struct CopOp Cop; + struct CoprocOptionOp CoprocOption; + struct MBOptOp MBOpt; + struct ITMaskOp ITMask; + struct IFlagsOp IFlags; + struct MMaskOp MMask; + struct TokOp Tok; + struct RegOp Reg; + struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct MemoryOp Memory; + struct PostIdxRegOp PostIdxReg; + struct ShifterImmOp ShifterImm; + struct RegShiftedRegOp RegShiftedReg; + struct RegShiftedImmOp RegShiftedImm; + struct RotImmOp RotImm; + struct BitfieldOp Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!tryParseRegisterWithWriteBack(Operands)) - return false; - int Res = tryParseShiftRegister(Operands); - if (Res == 0) // success - return false; - else if (Res == -1) // irrecoverable error - return true; - // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && - Parser.getTok().getString().equals_lower("apsr_nzcv")) { - S = Parser.getTok().getLoc(); - Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); - return false; + // If we've seen a branch mnemonic, the next operand must be a label. This + // is true even if the label is a register name. So "br r1" means branch to + // label "r1". + bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl"; + if (!ExpectLabel) { + if (!tryParseRegisterWithWriteBack(Operands)) + return false; + int Res = tryParseShiftRegister(Operands); + if (Res == 0) // success + return false; + else if (Res == -1) // irrecoverable error + return true; + // If this is VMRS, check for the apsr_nzcv operand. + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); + return false; + } } // Fall though for the Identifier case that is not a register or a diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 586834cf73..b832508a08 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen + A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 31a3b0b524..2e009e55e3 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -3049,9 +3049,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4, true, 2, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(Val << 1)); return MCDisassembler::Success; } @@ -3278,7 +3278,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } - if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (load) { diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index 463c440852..a64707e6f3 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -173,7 +173,6 @@ GCC is doing a couple of clever things here: mov r1, #1 lsl r1, r1, #8 tst r2, r1 - //===---------------------------------------------------------------------===// @@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack objects are referenced off the frame pointer with negative offsets. See oggenc for an example. - //===---------------------------------------------------------------------===// Poor codegen test/CodeGen/ARM/select.ll f7: diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 609d502aa5..7452fb776e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -588,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 604abf93cc..3e69098edc 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -518,7 +518,6 @@ void CppWriter::printAttributes(const AttributeSet &PAL, attrs.removeAttribute(Attribute::StackAlignment); } - assert(!attrs.hasAttributes() && "Unhandled attribute!"); Out << "PAS = AttributeSet::get(mod->getContext(), "; if (index == ~0U) Out << "~0U,"; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 62aed1353c..178662447a 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -701,7 +701,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If the induction variable bump is not a power of 2, quit. // Othwerise we'd need a general integer division. - if (!isPowerOf2_64(abs(IVBump))) + if (!isPowerOf2_64(abs64(IVBump))) return 0; MachineBasicBlock *PH = Loop->getLoopPreheader(); @@ -1430,7 +1430,6 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( return 0; typedef MachineBasicBlock::instr_iterator instr_iterator; - typedef MachineBasicBlock::pred_iterator pred_iterator; // Verify that all existing predecessors have analyzable branches // (or no branches at all). diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 3a1c48bac9..8fc9ba1ee8 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -113,6 +113,46 @@ public: SDNode *SelectAdd(SDNode *N); bool isConstExtProfitable(SDNode *N) const; +// XformMskToBitPosU5Imm - Returns the bit position which +// the single bit 32 bit mask represents. +// Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU5Imm(uint32_t Imm) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, MVT::i32); +} + +// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit +// mask represents. Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU4Imm(uint16_t Imm) { + return XformMskToBitPosU5Imm(Imm); +} + +// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit +// mask represents. Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU3Imm(uint8_t Imm) { + return XformMskToBitPosU5Imm(Imm); +} + +// Return true if there is exactly one bit set in V, i.e., if V is one of the +// following integers: 2^0, 2^1, ..., 2^31. +bool ImmIsSingleBit(uint32_t v) const { + uint32_t c = CountPopulation_64(v); + // Only return true if we counted 1 bit. + return c == 1; +} + +// XformM5ToU5Imm - Return a target constant with the specified value, of type +// i32 where the negative literal is transformed into a positive literal for +// use in -= memops. +inline SDValue XformM5ToU5Imm(signed Imm) { + assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant( - Imm, MVT::i32); +} + + // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range // [1..128], used in cmpb.gtu instructions. inline SDValue XformU7ToU7M1Imm(signed Imm) { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 4c0f93c6cd..60b12ac01c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -537,6 +537,15 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return(0); } +MachineInstr* +HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE)) + .addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { @@ -1881,6 +1890,13 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } +bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + assert(isPredicated(MI)); + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { @@ -1949,6 +1965,10 @@ isValidOffset(const int Opcode, const int Offset) const { // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is // inserted to calculate the final address. Due to this reason, the function // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. switch(Opcode) { @@ -1958,7 +1978,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: - assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); @@ -1968,14 +1987,12 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STrid: case Hexagon::STrid_indexed: case Hexagon::STrid_f: - assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); case Hexagon::LDrih: case Hexagon::LDriuh: case Hexagon::STrih: - assert((Offset % 2 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -1990,48 +2007,28 @@ isValidOffset(const int Opcode, const int Offset) const { return (Offset >= Hexagon_ADDI_OFFSET_MIN) && (Offset <= Hexagon_ADDI_OFFSET_MAX); - case Hexagon::MEMw_ADDi_indexed_MEM_V4 : - case Hexagon::MEMw_SUBi_indexed_MEM_V4 : - case Hexagon::MEMw_ADDr_indexed_MEM_V4 : - case Hexagon::MEMw_SUBr_indexed_MEM_V4 : - case Hexagon::MEMw_ANDr_indexed_MEM_V4 : - case Hexagon::MEMw_ORr_indexed_MEM_V4 : - case Hexagon::MEMw_ADDi_MEM_V4 : - case Hexagon::MEMw_SUBi_MEM_V4 : - case Hexagon::MEMw_ADDr_MEM_V4 : - case Hexagon::MEMw_SUBr_MEM_V4 : - case Hexagon::MEMw_ANDr_MEM_V4 : - case Hexagon::MEMw_ORr_MEM_V4 : - assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); + case Hexagon::MemOPw_ADDi_V4 : + case Hexagon::MemOPw_SUBi_V4 : + case Hexagon::MemOPw_ADDr_V4 : + case Hexagon::MemOPw_SUBr_V4 : + case Hexagon::MemOPw_ANDr_V4 : + case Hexagon::MemOPw_ORr_V4 : return (0 <= Offset && Offset <= 255); - case Hexagon::MEMh_ADDi_indexed_MEM_V4 : - case Hexagon::MEMh_SUBi_indexed_MEM_V4 : - case Hexagon::MEMh_ADDr_indexed_MEM_V4 : - case Hexagon::MEMh_SUBr_indexed_MEM_V4 : - case Hexagon::MEMh_ANDr_indexed_MEM_V4 : - case Hexagon::MEMh_ORr_indexed_MEM_V4 : - case Hexagon::MEMh_ADDi_MEM_V4 : - case Hexagon::MEMh_SUBi_MEM_V4 : - case Hexagon::MEMh_ADDr_MEM_V4 : - case Hexagon::MEMh_SUBr_MEM_V4 : - case Hexagon::MEMh_ANDr_MEM_V4 : - case Hexagon::MEMh_ORr_MEM_V4 : - assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); + case Hexagon::MemOPh_ADDi_V4 : + case Hexagon::MemOPh_SUBi_V4 : + case Hexagon::MemOPh_ADDr_V4 : + case Hexagon::MemOPh_SUBr_V4 : + case Hexagon::MemOPh_ANDr_V4 : + case Hexagon::MemOPh_ORr_V4 : return (0 <= Offset && Offset <= 127); - case Hexagon::MEMb_ADDi_indexed_MEM_V4 : - case Hexagon::MEMb_SUBi_indexed_MEM_V4 : - case Hexagon::MEMb_ADDr_indexed_MEM_V4 : - case Hexagon::MEMb_SUBr_indexed_MEM_V4 : - case Hexagon::MEMb_ANDr_indexed_MEM_V4 : - case Hexagon::MEMb_ORr_indexed_MEM_V4 : - case Hexagon::MEMb_ADDi_MEM_V4 : - case Hexagon::MEMb_SUBi_MEM_V4 : - case Hexagon::MEMb_ADDr_MEM_V4 : - case Hexagon::MEMb_SUBr_MEM_V4 : - case Hexagon::MEMb_ANDr_MEM_V4 : - case Hexagon::MEMb_ORr_MEM_V4 : + case Hexagon::MemOPb_ADDi_V4 : + case Hexagon::MemOPb_SUBi_V4 : + case Hexagon::MemOPb_ADDr_V4 : + case Hexagon::MemOPb_SUBr_V4 : + case Hexagon::MemOPb_ANDr_V4 : + case Hexagon::MemOPb_ORr_V4 : return (0 <= Offset && Offset <= 63); // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of @@ -2087,44 +2084,33 @@ isMemOp(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: return false; - case Hexagon::MEMw_ADDi_indexed_MEM_V4 : - case Hexagon::MEMw_SUBi_indexed_MEM_V4 : - case Hexagon::MEMw_ADDr_indexed_MEM_V4 : - case Hexagon::MEMw_SUBr_indexed_MEM_V4 : - case Hexagon::MEMw_ANDr_indexed_MEM_V4 : - case Hexagon::MEMw_ORr_indexed_MEM_V4 : - case Hexagon::MEMw_ADDi_MEM_V4 : - case Hexagon::MEMw_SUBi_MEM_V4 : - case Hexagon::MEMw_ADDr_MEM_V4 : - case Hexagon::MEMw_SUBr_MEM_V4 : - case Hexagon::MEMw_ANDr_MEM_V4 : - case Hexagon::MEMw_ORr_MEM_V4 : - case Hexagon::MEMh_ADDi_indexed_MEM_V4 : - case Hexagon::MEMh_SUBi_indexed_MEM_V4 : - case Hexagon::MEMh_ADDr_indexed_MEM_V4 : - case Hexagon::MEMh_SUBr_indexed_MEM_V4 : - case Hexagon::MEMh_ANDr_indexed_MEM_V4 : - case Hexagon::MEMh_ORr_indexed_MEM_V4 : - case Hexagon::MEMh_ADDi_MEM_V4 : - case Hexagon::MEMh_SUBi_MEM_V4 : - case Hexagon::MEMh_ADDr_MEM_V4 : - case Hexagon::MEMh_SUBr_MEM_V4 : - case Hexagon::MEMh_ANDr_MEM_V4 : - case Hexagon::MEMh_ORr_MEM_V4 : - case Hexagon::MEMb_ADDi_indexed_MEM_V4 : - case Hexagon::MEMb_SUBi_indexed_MEM_V4 : - case Hexagon::MEMb_ADDr_indexed_MEM_V4 : - case Hexagon::MEMb_SUBr_indexed_MEM_V4 : - case Hexagon::MEMb_ANDr_indexed_MEM_V4 : - case Hexagon::MEMb_ORr_indexed_MEM_V4 : - case Hexagon::MEMb_ADDi_MEM_V4 : - case Hexagon::MEMb_SUBi_MEM_V4 : - case Hexagon::MEMb_ADDr_MEM_V4 : - case Hexagon::MEMb_SUBr_MEM_V4 : - case Hexagon::MEMb_ANDr_MEM_V4 : - case Hexagon::MEMb_ORr_MEM_V4 : - return true; + case Hexagon::MemOPw_ADDi_V4 : + case Hexagon::MemOPw_SUBi_V4 : + case Hexagon::MemOPw_ADDr_V4 : + case Hexagon::MemOPw_SUBr_V4 : + case Hexagon::MemOPw_ANDr_V4 : + case Hexagon::MemOPw_ORr_V4 : + case Hexagon::MemOPh_ADDi_V4 : + case Hexagon::MemOPh_SUBi_V4 : + case Hexagon::MemOPh_ADDr_V4 : + case Hexagon::MemOPh_SUBr_V4 : + case Hexagon::MemOPh_ANDr_V4 : + case Hexagon::MemOPh_ORr_V4 : + case Hexagon::MemOPb_ADDi_V4 : + case Hexagon::MemOPb_SUBi_V4 : + case Hexagon::MemOPb_ADDr_V4 : + case Hexagon::MemOPb_SUBr_V4 : + case Hexagon::MemOPb_ANDr_V4 : + case Hexagon::MemOPb_ORr_V4 : + case Hexagon::MemOPb_SETBITi_V4: + case Hexagon::MemOPh_SETBITi_V4: + case Hexagon::MemOPw_SETBITi_V4: + case Hexagon::MemOPb_CLRBITi_V4: + case Hexagon::MemOPh_CLRBITi_V4: + case Hexagon::MemOPw_CLRBITi_V4: + return true; } + return false; } @@ -2383,6 +2369,13 @@ isConditionalStore (const MachineInstr* MI) const { } } +// Returns true, if any one of the operands is a dot new +// insn, whether it is predicated dot new or register dot new. +bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { + return (isNewValueInst(MI) || + (isPredicated(MI) && isPredicatedNew(MI))); +} + unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { const uint64_t F = MI->getDesc().TSFlags; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index d2f059aa79..5df13a88b5 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -127,6 +127,7 @@ public: const BranchProbability &Probability) const; virtual bool isPredicated(const MachineInstr *MI) const; + virtual bool isPredicatedNew(const MachineInstr *MI) const; virtual bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const; virtual bool @@ -140,6 +141,11 @@ public: isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, const BranchProbability &Probability) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; virtual DFAPacketizer* CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const; @@ -170,6 +176,7 @@ public: bool isConditionalLoad (const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const; bool isNewValueInst(const MachineInstr* MI) const; + bool isDotNewInst(const MachineInstr* MI) const; bool isDeallocRet(const MachineInstr *MI) const; unsigned getInvertedPredicatedOpcode(const int Opc) const; bool isExtendable(const MachineInstr* MI) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index d7bab200f9..74dc0ca72a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -446,38 +446,58 @@ def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2, s8ExtPred:$src2, s8ImmPred:$src3)))]>; -// Shift halfword. -let isPredicable = 1 in -def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = aslh($src1)", - [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>; +// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth +multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : ALU32Inst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2)">, + Requires<[HasV4T]>; +} -let isPredicable = 1 in -def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = asrh($src1)", - [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>; +multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>; + } +} -// Sign extend. -let isPredicable = 1 in -def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = sxtb($src1)", - [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>; +multiclass ALU32_2op_base<string mnemonic> { + let BaseOpcode = mnemonic in { + let isPredicable = 1, neverHasSideEffects = 1 in + def NAME : ALU32Inst<(outs IntRegs:$dst), + (ins IntRegs:$src1), + "$dst = "#mnemonic#"($src1)">; -let isPredicable = 1 in -def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = sxth($src1)", - [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>; - -// Zero extend. -let isPredicable = 1, neverHasSideEffects = 1 in -def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = zxtb($src1)", - []>; + let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1, + neverHasSideEffects = 1 in { + defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>; + defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>; + } + } +} + +defm ASLH : ALU32_2op_base<"aslh">, PredNewRel; +defm ASRH : ALU32_2op_base<"asrh">, PredNewRel; +defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel; +defm SXTH : ALU32_2op_base<"sxth">, PredNewRel; +defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel; +defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel; + +def : Pat <(shl (i32 IntRegs:$src1), (i32 16)), + (ASLH IntRegs:$src1)>; + +def : Pat <(sra (i32 IntRegs:$src1), (i32 16)), + (ASRH IntRegs:$src1)>; + +def : Pat <(sext_inreg (i32 IntRegs:$src1), i8), + (SXTB IntRegs:$src1)>; + +def : Pat <(sext_inreg (i32 IntRegs:$src1), i16), + (SXTH IntRegs:$src1)>; -let isPredicable = 1, neverHasSideEffects = 1 in -def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = zxth($src1)", - []>; //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -488,29 +508,30 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), //===----------------------------------------------------------------------===// // Conditional combine. -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in { def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1, isPredicated = 1 in +let isPredicatedFalse = 1 in def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1, isPredicated = 1 in +let isPredicatedNew = 1 in def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1, isPredicated = 1 in +let isPredicatedNew = 1, isPredicatedFalse = 1 in def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = combine($src2, $src3)", []>; +} // Compare. defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel; @@ -1009,20 +1030,6 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))), (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >; } -let neverHasSideEffects = 1 in -def LDrid_GP : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memd(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDd_GP : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global), - "$dst = memd(#$global)", - []>, - Requires<[NoV4T]>; - //===----------------------------------------------------------------------===// // Post increment load // Make sure that in post increment load, the first operand is always the post @@ -1095,27 +1102,6 @@ let AddedComplexity = 20 in def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))), (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >; -let neverHasSideEffects = 1 in -def LDrib_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memb(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDb_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memb(#$global)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDub_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memub(#$global)", - []>, - Requires<[NoV4T]>; - def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)), (i32 (LDrih ADDRriS11_1:$addr))>; @@ -1123,27 +1109,6 @@ let AddedComplexity = 20 in def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))), (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >; -let neverHasSideEffects = 1 in -def LDrih_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memh(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDh_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memh(#$global)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDuh_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memuh(#$global)", - []>, - Requires<[NoV4T]>; - let AddedComplexity = 10 in def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), (i32 (LDriub ADDRriS11_0:$addr))>; @@ -1152,21 +1117,6 @@ let AddedComplexity = 20 in def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))), (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>; -let neverHasSideEffects = 1 in -def LDriub_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memub(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -// Load unsigned halfword. -let neverHasSideEffects = 1 in -def LDriuh_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memuh(#$global+$offset)", - []>, - Requires<[NoV4T]>; - // Load predicate. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in @@ -1175,21 +1125,6 @@ def LDriw_pred : LDInst2<(outs PredRegs:$dst), "Error; should not emit", []>; -// Indexed load. -let neverHasSideEffects = 1 in -def LDriw_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memw(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDw_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memw(#$global)", - []>, - Requires<[NoV4T]>; - // Deallocate stack frame. let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { def DEALLOCFRAME : LDInst2<(outs), (ins), @@ -1423,28 +1358,8 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, // ST + //===----------------------------------------------------------------------===// /// -/// Assumptions::: ****** DO NOT IGNORE ******** -/// 1. Make sure that in post increment store, the zero'th operand is always the -/// post increment operand. -/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the -/// last operand. -/// // Store doubleword. -let neverHasSideEffects = 1 in -def STrid_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), - "memd(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STd_GP : STInst2<(outs), - (ins globaladdress:$global, DoubleRegs:$src), - "memd(#$global) = $src", - []>, - Requires<[NoV4T]>; - //===----------------------------------------------------------------------===// // Post increment store //===----------------------------------------------------------------------===// @@ -1655,36 +1570,6 @@ def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2, (i64 DoubleRegs:$src1))>; } -// memb(gp+#u16:0)=Rt -let neverHasSideEffects = 1 in -def STrib_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memb(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -// memb(#global)=Rt -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STb_GP : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memb(#$global) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1 in -def STrih_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memh(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STh_GP : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memh(#$global) = $src", - []>, - Requires<[NoV4T]>; - // memh(Rx++#s4:1)=Rt.H // Store word. @@ -1695,20 +1580,6 @@ def STriw_pred : STInst2<(outs), "Error; should not emit", []>; -let neverHasSideEffects = 1 in -def STriw_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memw(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STw_GP : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memw(#$global) = $src", - []>, - Requires<[NoV4T]>; - // Allocate stack frame. let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { def ALLOCFRAME : STInst2<(outs), @@ -2183,68 +2054,26 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)), // Atomic load and store support // 8 bit atomic load -def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDub_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_8 ADDRriS11_0:$src1), (i32 (LDriub ADDRriS11_0:$src1))>; def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)), (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>; - - // 16 bit atomic load -def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDuh_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_16 ADDRriS11_1:$src1), (i32 (LDriuh ADDRriS11_1:$src1))>; def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)), (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>; - - -// 32 bit atomic load -def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDw_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_32 ADDRriS11_2:$src1), (i32 (LDriw ADDRriS11_2:$src1))>; def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)), (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>; - // 64 bit atomic load -def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), - (i64 (LDd_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_64 ADDRriS11_3:$src1), (i64 (LDrid ADDRriS11_3:$src1))>; @@ -2252,30 +2081,6 @@ def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)), (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>; -// 64 bit atomic store -def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), - (i64 DoubleRegs:$src1)), - (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i64 DoubleRegs:$src1)), - (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>; - -// 8 bit atomic store -def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, Requires<[NoV4T]>; - def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)), (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>; @@ -2285,18 +2090,6 @@ def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset), (i32 IntRegs:$src1))>; -// 16 bit atomic store -def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, Requires<[NoV4T]>; - def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)), (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>; @@ -2305,20 +2098,6 @@ def : Pat<(atomic_store_16 (i32 IntRegs:$src1), (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset, (i32 IntRegs:$src1))>; - -// 32 bit atomic store -def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)), (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>; @@ -2387,198 +2166,8 @@ def : Pat <(brcond (not PredRegs:$src1), bb:$offset), def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; -// Map from store(globaladdress + x) -> memd(#foo + x). -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memd(#foo). -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress + x) -> memw(#foo + x). -let AddedComplexity = 100 in -def : Pat <(store (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memw(#foo + 0). -let AddedComplexity = 100 in -def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>; - -// Map from store(globaladdress) -> memw(#foo). -let AddedComplexity = 100 in -def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress + x) -> memh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(truncstorei16 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memh(#foo). -let AddedComplexity = 100 in -def : Pat <(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress + x) -> memb(#foo + x). -let AddedComplexity = 100 in -def : Pat <(truncstorei8 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memb(#foo). -let AddedComplexity = 100 in -def : Pat <(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memw(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memw(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDw_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memd(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memw(#foo + 0). -let AddedComplexity = 100 in -def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i64 (LDd_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd. -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDrih_GP tglobaladdr:$global, 0))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memuh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memuh(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDriuh_GP tglobaladdr:$global, 0))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memh(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memuh(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDuh_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memub(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memb(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memb(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memub(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// When the Interprocedural Global Variable optimizer realizes that a -// certain global variable takes only two constant values, it shrinks the -// global to a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. +let AddedComplexity = 10 in def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>; @@ -2694,12 +2283,6 @@ def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), def : Pat<(store (i1 -1), ADDRriS11_2:$addr), (STrib ADDRriS11_2:$addr, (TFRI 1))>; -let AddedComplexity = 100 in -// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1; -// memw(#foo) = r0 -def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP tglobaladdr:$global, (TFRI 1))>, - Requires<[NoV4T]>; // Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. def : Pat<(store (i1 -1), ADDRriS11_2:$addr), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 1d0643d03b..cd0e475896 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -95,164 +95,6 @@ def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), //===----------------------------------------------------------------------===// // ALU32 + //===----------------------------------------------------------------------===// - -// Shift halfword. - -let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in { -def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; -} - -// Sign extend. - -let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in { -def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - -def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - -def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - -def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - - -def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; - -def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; - -def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; - -def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; -} - -// Zero exten. - -let neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in { -def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; -} - // Generate frame index addresses. let neverHasSideEffects = 1, isReMaterializable = 1, isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in @@ -596,329 +438,6 @@ def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), Requires<[HasV4T]>; } -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global), - "$dst=memd(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rtt=memd(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rtt=memd(##global) -def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rtt=memd(##global) -def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rtt=memd(##global) -def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memb(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memb(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memb(##global) -def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memb(##global) -def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memb(##global) -def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memub(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memub(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memub(##global) -def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memub(##global) -def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memub(##global) -def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memh(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memh(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memh(##global) -def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memh(##global) -def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memh(##global) -def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memuh(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memuh(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memuh(##global) -def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memuh(##global) -def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memuh(##global) -def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memw(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memw(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memw(##global) -def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memw(##global) -def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memw(##global) -def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; -} - - -def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), - (i64 (LDd_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDw_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDuh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDub_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memw(#foo + 0) -let AddedComplexity = 100 in -def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i64 (LDd_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>, - Requires<[HasV4T]>; - -// When the Interprocedural Global Variable optimizer realizes that a certain -// global variable takes only two constant values, it shrinks the global to -// a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memub(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memuh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDuh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDw_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - // zext i1->i64 def : Pat <(i64 (zext (i1 PredRegs:$src1))), (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, @@ -1374,225 +893,6 @@ def STriw_shl_V4 : STInst<(outs), // memw(Rx++I:circ(Mu))=Rt // memw(Rx++Mu)=Rt // memw(Rx++Mu:brev)=Rt -// memw(gp+#u16:2)=Rt - - -// memd(#global)=Rtt -let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1, -validSubTargets = HasV4SubT in -def STd_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, DoubleRegs:$src), - "memd(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memd(##global) = Rtt -let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STd_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if ($src1) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memd(##global) = Rtt -def STd_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if (!$src1) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memd(##global) = Rtt -def STd_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if ($src1.new) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memd(##global) = Rtt -def STd_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if (!$src1.new) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// memb(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STb_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memb(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STb_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -def STb_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// memh(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STh_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memh(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STh_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -def STh_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// memw(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STw_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memw(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STw_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -def STw_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// 64 bit atomic store -def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), - (i64 DoubleRegs:$src1)), - (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memd(#foo) -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// 8 bit atomic store -def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" -// to "r0 = 1; memw(#foo) = r0" -let AddedComplexity = 100 in -def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// 32 bit atomic store -def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; //===----------------------------------------------------------------------=== // ST - @@ -1772,15 +1072,6 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; // memb(Rx++I:circ(Mu))=Nt.new // memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new - -// memb(#global)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STb_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memb(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - // memh(Ru<<#u2+#U6)=Nt.new let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, isNVStore = 1, validSubTargets = HasV4SubT in @@ -1795,14 +1086,6 @@ def STrih_shl_nv_V4 : NVInst_V4<(outs), // memh(Rx++Mu)=Nt.new // memh(Rx++Mu:brev)=Nt.new -// memh(#global)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STh_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memh(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - // memw(Ru<<#u2+#U6)=Nt.new let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, isNVStore = 1, validSubTargets = HasV4SubT in @@ -1816,102 +1099,6 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs), // memw(Rx++I:circ(Mu))=Nt.new // memw(Rx++Mu)=Nt.new // memw(Rx++Mu:brev)=Nt.new -// memw(gp+#u16:2)=Nt.new - -let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, -validSubTargets = HasV4SubT in -def STw_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memw(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; -} //===----------------------------------------------------------------------===// // NV/ST - @@ -2658,414 +1845,367 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), // MEMOP: Word, Half, Byte //===----------------------------------------------------------------------===// +def MEMOPIMM : SDNodeXForm<imm, [{ + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int32_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def MEMOPIMM_HALF : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 65535..65515 + // assigning to a short restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int16_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def MEMOPIMM_BYTE : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 255..235 + // assigning to a char restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int8_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def SETMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-31]. + // As an SDNode. + int32_t imm = N->getSExtValue(); + return XformMskToBitPosU5Imm(imm); +}]>; + +def CLRMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-31]. + // As an SDNode. + // we bit negate the value first + int32_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU5Imm(imm); +}]>; + +def SETMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-15]. + // As an SDNode. + int16_t imm = N->getSExtValue(); + return XformMskToBitPosU4Imm(imm); +}]>; + +def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-15]. + // As an SDNode. + // we bit negate the value first + int16_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU4Imm(imm); +}]>; + +def SETMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-7]. + // As an SDNode. + int8_t imm = N->getSExtValue(); + return XformMskToBitPosU3Imm(imm); +}]>; + +def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-7]. + // As an SDNode. + // we bit negate the value first + int8_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU3Imm(imm); +}]>; + +//===----------------------------------------------------------------------===// +// Template class for MemOp instructions with the register value. +//===----------------------------------------------------------------------===// +class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4<(outs), + (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta), + opc#"($base+#$offset)"#memOp#"$delta", + []>, + Requires<[HasV4T, UseMEMOP]> { + + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); + + let IClass = 0b0011; + let Inst{27-24} = 0b1110; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} + //===----------------------------------------------------------------------===// -// MEMOP: Word -// -// Implemented: -// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 -// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 -// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt -// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt -// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt -// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt -// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5 -// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5 -// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt -// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt -// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt -// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt -// -// Not implemented: -// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) -// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +// Template class for MemOp instructions with the immediate value. //===----------------------------------------------------------------------===// +class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4 <(outs), + (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta), + opc#"($base+#$offset)"#memOp#"#$delta" + #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')' + []>, + Requires<[HasV4T, UseMEMOP]> { + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); -// memw(Rs+#u6:2) += #U5 -let AddedComplexity = 30 in -def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend), - "memw($base+#$offset) += #$addend", - []>, - Requires<[HasV4T, UseMEMOP]>; + let IClass = 0b0011; + let Inst{27-24} = 0b1111; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} -// memw(Rs+#u6:2) -= #U5 -let AddedComplexity = 30 in -def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend), - "memw($base+#$offset) -= #$subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) += Rt -let AddedComplexity = 30 in -def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend), - "memw($base+#$offset) += $addend", - [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$addend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) -= Rt -let AddedComplexity = 30 in -def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend), - "memw($base+#$offset) -= $subend", - [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$subend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) &= Rt -let AddedComplexity = 30 in -def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend), - "memw($base+#$offset) &= $andend", - [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$andend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) |= Rt -let AddedComplexity = 30 in -def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend), - "memw($base+#$offset) |= $orend", - [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$orend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) += #U5 -let AddedComplexity = 30 in -def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$addend), - "memw($addr) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; +// multiclass to define MemOp instructions with register operand. +multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> { + def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add + def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub + def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and + def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or +} -// memw(Rs+#u6:2) -= #U5 -let AddedComplexity = 30 in -def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$subend), - "memw($addr) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) += Rt -let AddedComplexity = 30 in -def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$addend), - "memw($addr) += $addend", - [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) -= Rt -let AddedComplexity = 30 in -def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$subend), - "memw($addr) -= $subend", - [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) &= Rt -let AddedComplexity = 30 in -def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$andend), - "memw($addr) &= $andend", - [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) |= Rt -let AddedComplexity = 30 in -def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$orend), - "memw($addr) |= $orend", - [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; +// multiclass to define MemOp instructions with immediate Operand. +multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> { + def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >; + def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >; + def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>; + def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>; +} + +multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> { + defm r : MemOp_rr <opc, opcBits, ImmOp>; + defm i : MemOp_ri <opc, opcBits, ImmOp>; +} + +// Define MemOp instructions. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, +validSubTargets =HasV4SubT in { + let opExtentBits = 6, accessSize = ByteAccess in + defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>; + + let opExtentBits = 7, accessSize = HalfWordAccess in + defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>; + + let opExtentBits = 8, accessSize = WordAccess in + defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>; +} //===----------------------------------------------------------------------===// -// MEMOP: Halfword -// -// Implemented: -// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 -// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 -// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt -// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt -// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt -// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt -// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5 -// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5 -// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt -// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt -// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt -// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt -// -// Not implemented: -// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) -// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +// Multiclass to define 'Def Pats' for ALU operations on the memory +// Here value used for the ALU operation is an immediate value. +// mem[bh](Rs+#0) += #U5 +// mem[bh](Rs+#u6) += #U5 //===----------------------------------------------------------------------===// +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, + InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 180 in + def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), + IntRegs:$addr), + (MI IntRegs:$addr, #0, u5ImmPred:$addend )>; -// memh(Rs+#u6:1) += #U5 -let AddedComplexity = 30 in -def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend), - "memh($base+#$offset) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; + let AddedComplexity = 190 in + def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), + u5ImmPred:$addend), + (add IntRegs:$base, ExtPred:$offset)), + (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; +} -// memh(Rs+#u6:1) -= #U5 -let AddedComplexity = 30 in -def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend), - "memh($base+#$offset) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) += Rt -let AddedComplexity = 30 in -def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend), - "memh($base+#$offset) += $addend", - [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$addend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) -= Rt -let AddedComplexity = 30 in -def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend), - "memh($base+#$offset) -= $subend", - [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$subend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) &= Rt -let AddedComplexity = 30 in -def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend), - "memh($base+#$offset) += $andend", - [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$andend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) |= Rt -let AddedComplexity = 30 in -def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend), - "memh($base+#$offset) |= $orend", - [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$orend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) += #U5 -let AddedComplexity = 30 in -def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$addend), - "memh($addr) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, + InstHexagon addMI, InstHexagon subMI> { + defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; + defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; +} -// memh(Rs+#u6:1) -= #U5 -let AddedComplexity = 30 in -def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$subend), - "memh($addr) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) += Rt -let AddedComplexity = 30 in -def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$addend), - "memh($addr) += $addend", - [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) -= Rt -let AddedComplexity = 30 in -def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$subend), - "memh($addr) -= $subend", - [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) &= Rt -let AddedComplexity = 30 in -def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$andend), - "memh($addr) &= $andend", - [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) |= Rt -let AddedComplexity = 30 in -def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$orend), - "memh($addr) |= $orend", - [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + MemOPh_ADDi_V4, MemOPh_SUBi_V4>; + // Byte + defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, + MemOPb_ADDi_V4, MemOPb_SUBi_V4>; +} +let Predicates = [HasV4T, UseMEMOP] in { + defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4, + MemOPw_SUBi_V4>; +} //===----------------------------------------------------------------------===// -// MEMOP: Byte -// -// Implemented: -// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5 -// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5 -// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt -// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt -// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt -// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt -// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5 -// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5 -// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt -// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt -// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt -// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt -// -// Not implemented: -// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5) -// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5) -// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5) -// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5) +// multiclass to define 'Def Pats' for ALU operations on the memory. +// Here value used for the ALU operation is a negative value. +// mem[bh](Rs+#0) += #m5 +// mem[bh](Rs+#u6) += #m5 //===----------------------------------------------------------------------===// -// memb(Rs+#u6:0) += #U5 -let AddedComplexity = 30 in -def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend), - "memb($base+#$offset) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + PatLeaf immPred, ComplexPattern addrPred, + SDNodeXForm xformFunc, InstHexagon MI> { + let AddedComplexity = 190 in + def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend), + IntRegs:$addr), + (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>; -// memb(Rs+#u6:0) -= #U5 -let AddedComplexity = 30 in -def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend), - "memb($base+#$offset) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) += Rt -let AddedComplexity = 30 in -def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend), - "memb($base+#$offset) += $addend", - [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$addend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) -= Rt -let AddedComplexity = 30 in -def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend), - "memb($base+#$offset) -= $subend", - [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$subend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) &= Rt -let AddedComplexity = 30 in -def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend), - "memb($base+#$offset) += $andend", - [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$andend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) |= Rt -let AddedComplexity = 30 in -def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend), - "memb($base+#$offset) |= $orend", - [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$orend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) += #U5 -let AddedComplexity = 30 in -def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$addend), - "memb($addr) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; + let AddedComplexity = 195 in + def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$subend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; +} -// memb(Rs+#u6:0) -= #U5 -let AddedComplexity = 30 in -def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$subend), - "memb($addr) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) += Rt -let AddedComplexity = 30 in -def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$addend), - "memb($addr) += $addend", - [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) -= Rt -let AddedComplexity = 30 in -def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$subend), - "memb($addr) -= $subend", - [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) &= Rt -let AddedComplexity = 30 in -def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$andend), - "memb($addr) &= $andend", - [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) |= Rt -let AddedComplexity = 30 in -def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$orend), - "memb($addr) |= $orend", - [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, + ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>; + // Byte + defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, + ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>; +} +let Predicates = [HasV4T, UseMEMOP] in { + defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, + ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for bit operations on the memory. +// mem[bhw](Rs+#0) = [clrbit|setbit](#U5) +// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5) +//===----------------------------------------------------------------------===// + +multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, + PatLeaf extPred, ComplexPattern addrPred, + SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> { + + // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) + let AddedComplexity = 250 in + def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$bitend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; + + // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) + let AddedComplexity = 225 in + def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend), + addrPred:$addr), + (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>; +} + +multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Byte - clrbit + defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, + ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>; + // Byte - setbit + defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, + ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>; + // Half Word - clrbit + defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, + ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>; + // Half Word - setbit + defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, + ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>; +} + +let Predicates = [HasV4T, UseMEMOP] in { + // mem[bh](Rs+#0) = [clrbit|setbit](#U5) + // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) + defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend + + // memw(Rs+#0) = [clrbit|setbit](#U5) + // memw(Rs+#u6:2) = [clrbit|setbit](#U5) + defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2, + CLRMEMIMM, MemOPw_CLRBITi_V4, and>; + defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2, + SETMEMIMM, MemOPw_SETBITi_V4, or>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for ALU operations on the memory +// where addend is a register. +// mem[bhw](Rs+#0) [+-&|]= Rt +// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt +//===----------------------------------------------------------------------===// + +multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred, + PatLeaf extPred, InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 141 in + // mem[bhw](Rs+#0) [+-&|]= Rt + def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)), + addrPred:$addr), + (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>; + + // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt + let AddedComplexity = 150 in + def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + (i32 IntRegs:$orend)), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>; +} + +multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, + ComplexPattern addrPred, PatLeaf extPred, + InstHexagon addMI, InstHexagon subMI, + InstHexagon andMI, InstHexagon orMI > { + + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>; +} + +multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred, + MemOPh_ADDr_V4, MemOPh_SUBr_V4, + MemOPh_ANDr_V4, MemOPh_ORr_V4>; + // Byte + defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred, + MemOPb_ADDr_V4, MemOPb_SUBr_V4, + MemOPb_ANDr_V4, MemOPb_ORr_V4>; +} + +// Define 'def Pats' for MemOps with register addend. +let Predicates = [HasV4T, UseMEMOP] in { + // Byte, Half Word + defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend + // Word + defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4, + MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >; +} //===----------------------------------------------------------------------===// // XTYPE/PRED + @@ -3730,6 +2870,108 @@ def : Pat<(store (i64 DoubleRegs:$src1), (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; } +//===----------------------------------------------------------------------===// +// multiclass for store instructions with GP-relative addressing mode. +// mem[bhwd](#global)=Rt +// if ([!]Pv[.new]) mem[bhwd](##global) = Rt +//===----------------------------------------------------------------------===// +multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins globaladdress:$global, RC:$src), + mnemonic#"(#$global) = $src", + []>; + + // When GP-relative instructions are predicated, their addressing mode is + // changed to absolute and they are always constant extended. + let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, + isPredicated = 1 in { + defm Pt : ST_Abs_Pred <mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>; + } +} + +let mayStore = 1, isNVStore = 1 in +multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp, isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins u0AlwaysExt:$global, RC:$src), + mnemonic#"(#$global) = $src.new", + []>, + Requires<[HasV4T]>; + + // When GP-relative instructions are predicated, their addressing mode is + // changed to absolute and they are always constant extended. + let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, + isPredicated = 1 in { + defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; + } +} + +let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in { +defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, + ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ; +defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>, + ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ; +defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>, + ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ; +defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>, + ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ; +} + +// 64 bit atomic store +def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), + (i64 DoubleRegs:$src1)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memd(#foo) +let AddedComplexity = 100 in +def : Pat <(store (i64 DoubleRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>; + +// 8 bit atomic store +def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" +// to "r0 = 1; memw(#foo) = r0" +let AddedComplexity = 100 in +def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>; + +def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// 32 bit atomic store +def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +//===----------------------------------------------------------------------===// +// Multiclass for the load instructions with absolute addressing mode. +//===----------------------------------------------------------------------===// multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { let PNewValue = !if(isPredNew, "new", "") in @@ -3795,6 +3037,107 @@ let Predicates = [HasV4T], AddedComplexity=30 in def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), (LDriuh_abs_V4 tglobaladdr:$absaddr)>; +//===----------------------------------------------------------------------===// +// multiclass for load instructions with GP-relative addressing mode. +// Rx=mem[bhwd](##global) +// if ([!]Pv[.new]) Rx=mem[bhwd](##global) +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1, validSubTargets = HasV4SubT in +multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp in { + let isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins globaladdress:$global), + "$dst = "#mnemonic#"(#$global)", + []>; + + let isExtended = 1, opExtendable = 2, isPredicated = 1 in { + defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; + defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; + } + } +} + +defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>; +defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>; +defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>; +defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>; +defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>; +defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>; + +def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), + (i64 (LDd_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDw_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memw(#foo + 0) +let AddedComplexity = 100 in +def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i64 (LDd_GP_V4 tglobaladdr:$global))>; + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>; + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memub(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memuh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDw_GP_V4 tglobaladdr:$global))>; + + // Transfer global address into a register let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1), diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h deleted file mode 100644 index e16636ea48..0000000000 --- a/lib/Target/Hexagon/HexagonMCInst.h +++ /dev/null @@ -1,41 +0,0 @@ -//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class extends MCInst to allow some VLIW annotation. -// -//===----------------------------------------------------------------------===// - -#ifndef HEXAGONMCINST_H -#define HEXAGONMCINST_H - -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCInst.h" - -namespace llvm { - class HexagonMCInst: public MCInst { - // Packet start and end markers - unsigned startPacket: 1, endPacket: 1; - const MachineInstr *MachineI; - public: - explicit HexagonMCInst(): MCInst(), - startPacket(0), endPacket(0) {} - - const MachineInstr* getMI() const { return MachineI; } - - void setMI(const MachineInstr *MI) { MachineI = MI; } - - bool isStartPacket() const { return (startPacket); } - bool isEndPacket() const { return (endPacket); } - - void setStartPacket(bool yes) { startPacket = yes; } - void setEndPacket(bool yes) { endPacket = yes; } - }; -} - -#endif diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index cd3d2898d0..5e80e48b01 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -220,7 +220,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, return false; } - unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. + unsigned cmpReg1, cmpOp2; cmpReg1 = MI->getOperand(1).getReg(); if (secondReg) { diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index f947dfcdf9..d8b4e2fcb3 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -14,25 +14,26 @@ #include "HexagonRegisterInfo.h" #include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -215,28 +216,41 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if (TII.isMemOp(&MI)) { - unsigned resReg = HEXAGON_RESERVED_REG_1; - if (!MFI.hasVarSizedObjects() && - TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { - MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), - false, false, true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); - } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_rr), - resReg).addReg(FrameReg).addReg(resReg); - MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + // use the constant extender if the instruction provides it + // and we are V4TOps. + if (Subtarget.hasV4TOps()) { + if (TII.isConstExtended(&MI)) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + TII.immediateExtend(&MI); + } else { + llvm_unreachable("Need to implement for memops"); + } } else { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), - resReg).addReg(FrameReg).addImm(Offset); - MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + // Only V3 and older instructions here. + unsigned ResReg = HEXAGON_RESERVED_REG_1; + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), + false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); + } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg). + addReg(ResReg); + MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg). + addImm(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } } } else { unsigned dstReg = MI.getOperand(0).getReg(); diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 4bacb8fa67..07d5ce1d8a 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -29,8 +29,16 @@ EnableV3("enable-hexagon-v3", cl::Hidden, static cl::opt<bool> EnableMemOps( "enable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, - cl::desc("Generate V4 memop instructions.")); + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), + cl::desc( + "Generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> +DisableMemOps( + "disable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), + cl::desc( + "Do not generate V4 MEMOP in code generation for Hexagon target")); static cl::opt<bool> EnableIEEERndNear( @@ -64,7 +72,10 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if (EnableMemOps) + // UseMemOps on by default unless disabled explicitly + if (DisableMemOps) + UseMemOps = false; + else if (EnableMemOps) UseMemOps = true; else UseMemOps = false; diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index d9fef3e455..ce45c626f7 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -35,6 +35,10 @@ opt<bool> DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling")); +static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -75,19 +79,20 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget), InstrItins(&Subtarget.getInstrItineraryData()) { - setMCUseCFI(false); + setMCUseCFI(false); } // addPassesForOptimizations - Allow the backend (target) to add Target // Independent Optimization passes to the Pass Manager. bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { - - PM.add(createConstantPropagationPass()); - PM.add(createLoopSimplifyPass()); - PM.add(createDeadCodeEliminationPass()); - PM.add(createConstantPropagationPass()); - PM.add(createLoopUnrollPass()); - PM.add(createLoopStrengthReducePass()); + if (getOptLevel() != CodeGenOpt::None) { + PM.add(createConstantPropagationPass()); + PM.add(createLoopSimplifyPass()); + PM.add(createDeadCodeEliminationPass()); + PM.add(createConstantPropagationPass()); + PM.add(createLoopUnrollPass()); + PM.add(createLoopStrengthReducePass()); + } return true; } @@ -121,38 +126,45 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { - addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel())); - addPass(createHexagonPeephole()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonPeephole()); + return false; } bool HexagonPassConfig::addPreRegAlloc() { - if (!DisableHardwareLoops) { + if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None) addPass(createHexagonHardwareLoops()); - } return false; } bool HexagonPassConfig::addPostRegAlloc() { - addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); + if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None) + addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); return true; } bool HexagonPassConfig::addPreSched2() { - addPass(&IfConverterID); + if (getOptLevel() != CodeGenOpt::None) + addPass(&IfConverterID); return true; } bool HexagonPassConfig::addPreEmitPass() { - if (!DisableHardwareLoops) { + if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None) addPass(createHexagonFixupHwLoops()); - } - addPass(createHexagonNewValueJump()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonNewValueJump()); // Expand Spill code for predicate registers. addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); @@ -161,7 +173,8 @@ bool HexagonPassConfig::addPreEmitPass() { addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); // Create Packets. - addPass(createHexagonPacketizer()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonPacketizer()); return false; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 866beb1688..c0d86da1c0 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -149,7 +149,6 @@ namespace { bool canReserveResourcesForConstExt(MachineInstr *MI); void reserveResourcesForConstExt(MachineInstr* MI); bool isNewValueInst(MachineInstr* MI); - bool isDotNewInst(MachineInstr* MI); }; } @@ -2154,172 +2153,6 @@ static bool GetPredicateSense(MachineInstr* MI, return false; } -bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (QII->isNewValueInst(MI)) - return true; - - switch (MI->getOpcode()) { - case Hexagon::TFR_cdnNotPt: - case Hexagon::TFR_cdnPt: - case Hexagon::TFRI_cdnNotPt: - case Hexagon::TFRI_cdnPt: - case Hexagon::LDrid_cdnPt : - case Hexagon::LDrid_cdnNotPt : - case Hexagon::LDrid_indexed_cdnPt : - case Hexagon::LDrid_indexed_cdnNotPt : - case Hexagon::POST_LDrid_cdnPt_V4 : - case Hexagon::POST_LDrid_cdnNotPt_V4 : - case Hexagon::LDriw_cdnPt : - case Hexagon::LDriw_cdnNotPt : - case Hexagon::LDriw_indexed_cdnPt : - case Hexagon::LDriw_indexed_cdnNotPt : - case Hexagon::POST_LDriw_cdnPt_V4 : - case Hexagon::POST_LDriw_cdnNotPt_V4 : - case Hexagon::LDrih_cdnPt : - case Hexagon::LDrih_cdnNotPt : - case Hexagon::LDrih_indexed_cdnPt : - case Hexagon::LDrih_indexed_cdnNotPt : - case Hexagon::POST_LDrih_cdnPt_V4 : - case Hexagon::POST_LDrih_cdnNotPt_V4 : - case Hexagon::LDrib_cdnPt : - case Hexagon::LDrib_cdnNotPt : - case Hexagon::LDrib_indexed_cdnPt : - case Hexagon::LDrib_indexed_cdnNotPt : - case Hexagon::POST_LDrib_cdnPt_V4 : - case Hexagon::POST_LDrib_cdnNotPt_V4 : - case Hexagon::LDriuh_cdnPt : - case Hexagon::LDriuh_cdnNotPt : - case Hexagon::LDriuh_indexed_cdnPt : - case Hexagon::LDriuh_indexed_cdnNotPt : - case Hexagon::POST_LDriuh_cdnPt_V4 : - case Hexagon::POST_LDriuh_cdnNotPt_V4 : - case Hexagon::LDriub_cdnPt : - case Hexagon::LDriub_cdnNotPt : - case Hexagon::LDriub_indexed_cdnPt : - case Hexagon::LDriub_indexed_cdnNotPt : - case Hexagon::POST_LDriub_cdnPt_V4 : - case Hexagon::POST_LDriub_cdnNotPt_V4 : - - case Hexagon::LDrid_indexed_shl_cdnPt_V4 : - case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_shl_cdnPt_V4 : - case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_shl_cdnPt_V4 : - case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_shl_cdnPt_V4 : - case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : - case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_shl_cdnPt_V4 : - case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : - -// Coditional add - case Hexagon::ADD_ri_cdnPt: - case Hexagon::ADD_ri_cdnNotPt: - case Hexagon::ADD_rr_cdnPt: - case Hexagon::ADD_rr_cdnNotPt: - - // Conditional logical operations - case Hexagon::XOR_rr_cdnPt : - case Hexagon::XOR_rr_cdnNotPt : - case Hexagon::AND_rr_cdnPt : - case Hexagon::AND_rr_cdnNotPt : - case Hexagon::OR_rr_cdnPt : - case Hexagon::OR_rr_cdnNotPt : - - // Conditonal subtract - case Hexagon::SUB_rr_cdnPt : - case Hexagon::SUB_rr_cdnNotPt : - - // Conditional combine - case Hexagon::COMBINE_rr_cdnPt : - case Hexagon::COMBINE_rr_cdnNotPt : - - // Conditional shift operations - case Hexagon::ASLH_cdnPt_V4: - case Hexagon::ASLH_cdnNotPt_V4: - case Hexagon::ASRH_cdnPt_V4: - case Hexagon::ASRH_cdnNotPt_V4: - case Hexagon::SXTB_cdnPt_V4: - case Hexagon::SXTB_cdnNotPt_V4: - case Hexagon::SXTH_cdnPt_V4: - case Hexagon::SXTH_cdnNotPt_V4: - case Hexagon::ZXTB_cdnPt_V4: - case Hexagon::ZXTB_cdnNotPt_V4: - case Hexagon::ZXTH_cdnPt_V4: - case Hexagon::ZXTH_cdnNotPt_V4: - - // Conditional stores - case Hexagon::STrib_imm_cdnPt_V4 : - case Hexagon::STrib_imm_cdnNotPt_V4 : - case Hexagon::STrib_cdnPt_V4 : - case Hexagon::STrib_cdnNotPt_V4 : - case Hexagon::STrib_indexed_cdnPt_V4 : - case Hexagon::STrib_indexed_cdnNotPt_V4 : - case Hexagon::POST_STbri_cdnPt_V4 : - case Hexagon::POST_STbri_cdnNotPt_V4 : - case Hexagon::STrib_indexed_shl_cdnPt_V4 : - case Hexagon::STrib_indexed_shl_cdnNotPt_V4 : - - // Store doubleword conditionally - case Hexagon::STrid_indexed_cdnPt_V4 : - case Hexagon::STrid_indexed_cdnNotPt_V4 : - case Hexagon::STrid_indexed_shl_cdnPt_V4 : - case Hexagon::STrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::POST_STdri_cdnPt_V4 : - case Hexagon::POST_STdri_cdnNotPt_V4 : - - // Store halfword conditionally - case Hexagon::STrih_cdnPt_V4 : - case Hexagon::STrih_cdnNotPt_V4 : - case Hexagon::STrih_indexed_cdnPt_V4 : - case Hexagon::STrih_indexed_cdnNotPt_V4 : - case Hexagon::STrih_imm_cdnPt_V4 : - case Hexagon::STrih_imm_cdnNotPt_V4 : - case Hexagon::STrih_indexed_shl_cdnPt_V4 : - case Hexagon::STrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::POST_SThri_cdnPt_V4 : - case Hexagon::POST_SThri_cdnNotPt_V4 : - - // Store word conditionally - case Hexagon::STriw_cdnPt_V4 : - case Hexagon::STriw_cdnNotPt_V4 : - case Hexagon::STriw_indexed_cdnPt_V4 : - case Hexagon::STriw_indexed_cdnNotPt_V4 : - case Hexagon::STriw_imm_cdnPt_V4 : - case Hexagon::STriw_imm_cdnNotPt_V4 : - case Hexagon::STriw_indexed_shl_cdnPt_V4 : - case Hexagon::STriw_indexed_shl_cdnNotPt_V4 : - case Hexagon::POST_STwri_cdnPt_V4 : - case Hexagon::POST_STwri_cdnNotPt_V4 : - - case Hexagon::LDd_GP_cdnPt_V4: - case Hexagon::LDd_GP_cdnNotPt_V4: - case Hexagon::LDb_GP_cdnPt_V4: - case Hexagon::LDb_GP_cdnNotPt_V4: - case Hexagon::LDub_GP_cdnPt_V4: - case Hexagon::LDub_GP_cdnNotPt_V4: - case Hexagon::LDh_GP_cdnPt_V4: - case Hexagon::LDh_GP_cdnNotPt_V4: - case Hexagon::LDuh_GP_cdnPt_V4: - case Hexagon::LDuh_GP_cdnNotPt_V4: - case Hexagon::LDw_GP_cdnPt_V4: - case Hexagon::LDw_GP_cdnNotPt_V4: - - case Hexagon::STd_GP_cdnPt_V4: - case Hexagon::STd_GP_cdnNotPt_V4: - case Hexagon::STb_GP_cdnPt_V4: - case Hexagon::STb_GP_cdnNotPt_V4: - case Hexagon::STh_GP_cdnPt_V4: - case Hexagon::STh_GP_cdnNotPt_V4: - case Hexagon::STw_GP_cdnPt_V4: - case Hexagon::STw_GP_cdnNotPt_V4: - return true; - } - return false; -} - static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, const HexagonInstrInfo *QII) { assert(QII->isPostIncrement(MI) && "Not a post increment operation."); @@ -2490,7 +2323,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI, // sense, i.e, either both should be negated or both should be none negated. if (( predRegNumDst != predRegNumSrc) || - isDotNewInst(PacketMI) != isDotNewInst(MI) || + QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) || GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) { return false; } @@ -2600,8 +2433,9 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC ) { - // already a dot new instruction - if (isDotNewInst(MI) && !IsNewifyStore(MI)) + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // Already a dot new instruction. + if (QII->isDotNewInst(MI) && !IsNewifyStore(MI)) return false; if (!isNewifiable(MI)) @@ -2616,7 +2450,6 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI, else { // Create a dot new machine instruction to see if resources can be // allocated. If not, bail out now. - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; int NewOpcode = GetDotNewOp(MI->getOpcode()); const MCInstrDesc &desc = QII->get(NewOpcode); DebugLoc dl; @@ -2759,7 +2592,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, // !p0 is not complimentary to p0.new return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) && (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) && - (isDotNewInst(MI1) == isDotNewInst(MI2))); + (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); } // initPacketizerState - Initialize packetizer flags diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index 86f75d1c2d..3deb8d1deb 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -31,6 +31,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) { AscizDirective = "\t.string\t"; WeakRefDirective = "\t.weak\t"; + SupportsDebugInformation = true; UsesELFSectionDirectiveForBSS = true; ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index ad495ff306..dda6e247ac 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffReg; + const MCExpr *Off; + }; + + struct FslImmOp { + const MCExpr *Val; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - unsigned OffReg; - const MCExpr *Off; - } Mem; - - struct { - const MCExpr *Val; - } FslImm; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + struct FslImmOp FslImm; }; MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index d0fd7dcec1..bd83afc1cc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } void MBlazeRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { +processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 99a2fac95c..497f3866c9 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index ae2e55617d..e504011dfd 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } void -MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index a077dd7351..c673f59b5e 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -50,7 +50,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; }; } // End llvm namespace diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ade6084752..c403f216b0 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterClass); + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); @@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser { SmallVectorImpl<MCInst> &Instructions); void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + void expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd); bool reportParseError(StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res); @@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); + bool parseSetAssignment(); + bool parseDirectiveWord(unsigned Size, SMLoc L); MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); @@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getReg(int RC,int RegNo); int getATReg(); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { @@ -211,25 +222,30 @@ private: MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + const MCExpr *Off; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - RegisterKind Kind; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - const MCExpr *Off; - } Mem; + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; SMLoc StartLoc, EndLoc; @@ -385,6 +401,56 @@ public: }; } +namespace llvm { +extern const MCInstrDesc MipsInsts[]; +} +static const MCInstrDesc &getInstDesc(unsigned Opcode) { + return MipsInsts[Opcode]; +} + +bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + Inst.setLoc(IDLoc); + if (MCID.mayLoad() || MCID.mayStore()) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions + for (unsigned i=0;i<MCID.getNumOperands();i++) { + const MCOperandInfo &OpInfo = MCID.OpInfo[i]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(i); + if (Op.isImm()) { + int MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + // Offset can't exceed 16bit value + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true); + return false; + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef){ + const MCSymbolRefExpr *SR = + static_cast<const MCSymbolRefExpr*>(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + // Expand symbol + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false); + return false; + } + } + } + } + } + } + + if (needsExpansion(Inst)) + expandInstruction(Inst, IDLoc, Instructions); + else + Instructions.push_back(Inst); + + return false; +} + bool MipsAsmParser::needsExpansion(MCInst &Inst) { switch(Inst.getOpcode()) { @@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, } } +void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd) { + const MCSymbolRefExpr *SR; + MCInst TempInst; + unsigned ImmOffset,HiOffset,LoOffset; + const MCExpr *ExprOffset; + unsigned TmpRegNum; + unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID: + Mips::CPURegsRegClassID, + getATReg()); + // 1st operand is either source or dst register + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + unsigned RegOpNum = Inst.getOperand(0).getReg(); + // 2nd operand is base register + assert(Inst.getOperand(1).isReg() && "expected register operand kind"); + unsigned BaseRegNum = Inst.getOperand(1).getReg(); + // 3rd operand is either immediate or expression + if (isImmOpnd) { + assert(Inst.getOperand(2).isImm() && "expected immediate operand kind"); + ImmOffset = Inst.getOperand(2).getImm(); + LoOffset = ImmOffset & 0x0000ffff; + HiOffset = (ImmOffset & 0xffff0000) >> 16; + // If msb of LoOffset is 1(negative number) we must increment HiOffset + if (LoOffset & 0x8000) + HiOffset++; + } + else + ExprOffset = Inst.getOperand(2).getExpr(); + // All instructions will have the same location + TempInst.setLoc(IDLoc); + // 1st instruction in expansion is LUi. For load instruction we can use + // the dst register as a temporary if base and dst are different, + // but for stores we must use $at + TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum; + TempInst.setOpcode(Mips::LUi); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(HiOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + SR = static_cast<const MCSymbolRefExpr*>(ExprOffset); + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_HI, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(HiExpr)); + } + } + // Add the instruction to the list + Instructions.push_back(TempInst); + // and prepare TempInst for next instruction + TempInst.clear(); + // which is add temp register to base + TempInst.setOpcode(Mips::ADDu); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); + Instructions.push_back(TempInst); + TempInst.clear(); + // and finaly, create original instruction with low part + // of offset and new base + TempInst.setOpcode(Inst.getOpcode()); + TempInst.addOperand(MCOperand::CreateReg(RegOpNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(LoOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_LO, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(LoExpr)); + } + } + Instructions.push_back(TempInst); + TempInst.clear(); +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; + SmallVector<MCInst, 8> Instructions; unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: { - if (needsExpansion(Inst)) { - SmallVector<MCInst, 4> Instructions; - expandInstruction(Inst, IDLoc, Instructions); - for(unsigned i =0; i < Instructions.size(); i++){ - Out.EmitInstruction(Instructions[i]); - } - } else { - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); - } + if (processInstruction(Inst,IDLoc,Instructions)) + return true; + for(unsigned i =0; i < Instructions.size(); i++) + Out.EmitInstruction(Instructions[i]); return false; } case Match_MissingFeature: @@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return false; } case AsmToken::Identifier: + // Look for the existing symbol, we should check if + // we need to assigne the propper RegisterKind + if (searchSymbolAlias(Operands,MipsOperand::Kind_None)) + return false; + //else drop to expression parsing case AsmToken::LParen: case AsmToken::Minus: case AsmToken::Plus: @@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { // Check the type of the expression if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { - // it's a constant, evaluate lo or hi value - int Val = MCE->getValue(); + // It's a constant, evaluate lo or hi value if (Str == "lo") { - Val = Val & 0xffff; + short Val = MCE->getValue(); + Res = MCConstantExpr::Create(Val, getContext()); } else if (Str == "hi") { + int Val = MCE->getValue(); int LoSign = Val & 0x8000; Val = (Val & 0xffff0000) >> 16; - //lower part is treated as signed int, so if it is negative - //we must add 1 to hi part to compensate + // Lower part is treated as a signed int, so if it is negative + // we must add 1 to the hi part to compensate if (LoSign) Val++; + Res = MCConstantExpr::Create(Val, getContext()); } - Res = MCConstantExpr::Create(Val, getContext()); return false; } if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { - // it's a symbol, create symbolic expression from symbol + // It's a symbol, create symbolic expression from symbol StringRef Symbol = MSRE->getSymbol().getName(); MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); @@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { switch(getLexer().getKind()) { default: return true; + case AsmToken::Identifier: case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: @@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!isMips64()) return MatchOperand_NoMatch; + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; } +bool MipsAsmParser:: +searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterKind) { + + MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); + if (Sym) { + SMLoc S = Parser.getTok().getLoc(); + const MCExpr *Expr; + if (Sym->isVariable()) + Expr = Sym->getVariableValue(); + else + return false; + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + const StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + // Lookup for the register with corresponding name + int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64()); + if (RegNum > -1) { + Parser.Lex(); + MipsOperand *op = MipsOperand::CreateReg(RegNum,S, + Parser.getTok().getLoc()); + op->setRegKind((MipsOperand::RegisterKind)RegisterKind); + Operands.push_back(op); + return true; + } + } + } else if (Expr->getKind() == MCExpr::Constant) { + Parser.Lex(); + const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr); + MipsOperand *op = MipsOperand::CreateImm(Const,S, + Parser.getTok().getLoc()); + Operands.push_back(op); + return true; + } + } + return false; +} MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { } bool MipsAsmParser::parseSetNoAtDirective() { - // line should look like: + // Line should look like: // .set noat // set at reg to 0 Options.setATReg(0); // eat noat Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } else if (getLexer().is(AsmToken::Equal)) { - getParser().Lex(); //eat '=' + getParser().Lex(); // eat '=' if (getLexer().isNot(AsmToken::Dollar)) { reportParseError("unexpected token in statement"); return false; } - Parser.Lex(); // eat '$' + Parser.Lex(); // Eat '$' const AsmToken &Reg = Parser.getTok(); if (Reg.is(AsmToken::Identifier)) { AtRegNo = matchCPURegisterName(Reg.getIdentifier()); @@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() { reportParseError("unexpected token in statement"); return false; } - getParser().Lex(); //eat reg + getParser().Lex(); // Eat reg if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); @@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() { bool MipsAsmParser::parseSetReorderDirective() { Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } + +bool MipsAsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + + if (Parser.parseIdentifier(Name)) + reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token in .set directive"); + Lex(); //eat comma + + if (Parser.parseExpression(Value)) + reportParseError("expected valid expression after comma"); + + // check if the Name already exists as a symbol + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + return reportParseError("symbol already defined"); + } + Sym = getContext().GetOrCreateSymbol(Name); + Sym->setVariableValue(Value); + + return false; +} bool MipsAsmParser::parseDirectiveSet() { // get next token @@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() { // ignore this directive for now Parser.eatToEndOfStatement(); return false; + } else { + // it is just an identifier, look for assignment + parseSetAssignment(); + return false; } return true; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 58aa1be34d..cf8bb189e4 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen Mips16FrameLowering.cpp Mips16InstrInfo.cpp + Mips16ISelDAGToDAG.cpp + Mips16ISelLowering.cpp Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp @@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen MipsRegisterInfo.cpp MipsSEFrameLowering.cpp MipsSEInstrInfo.cpp + MipsSEISelDAGToDAG.cpp + MipsSEISelLowering.cpp MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 025a783f93..59e49d8ddc 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -138,10 +138,10 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, @@ -484,14 +484,14 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo); + unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo); Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 96f93a0789..e198a7c983 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions or immediates"); @@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm()>>2; + assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions or an immediate"); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 25f4ffb929..54fdb78714 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -20,7 +20,7 @@ namespace llvm { class Mips16FrameLowering : public MipsFrameLowering { public: explicit Mips16FrameLowering(const MipsSubtarget &STI) - : MipsFrameLowering(STI) {} + : MipsFrameLowering(STI, 8) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp new file mode 100644 index 0000000000..00b3449300 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -0,0 +1,308 @@ +//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "Mips16ISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = Mips::Mflo16; + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = Mips::Mfhi16; + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC = + (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + V2 = RegInfo.createVirtualRegister(RC); + + BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); + BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) + .addReg(V1).addReg(V2); +} + +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + +void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + initMips16SPAliasReg(MF); +} + +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + +void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} + +bool Mips16DAGToDAGISel::selectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasFPIdx()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); (void)Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::AdduRxRyRz16; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SubuRxRyRz16; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + + EVT VT = LHS.getValueType(); + + unsigned Sltu_op = Mips::SltuRxRyRz16; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2); + unsigned Addu_op = Mips::AdduRxRyRz16; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT, + SDValue(Carry,0), RHS); + + SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry,0)); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16); + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + case ISD::MULHS: + case ISD::MULHU: { + MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16); + SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) { + return new Mips16DAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h new file mode 100644 index 0000000000..baa85877d9 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -0,0 +1,51 @@ +//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16ISELDAGTODAG_H +#define MIPS16ISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class Mips16DAGToDAGISel : public MipsDAGToDAGISel { +public: + explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, + EVT Ty, bool HasLo, bool HasHi); + + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); + + void initMips16SPAliasReg(MachineFunction &MF); +}; + +FunctionPass *createMips16ISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp new file mode 100644 index 0000000000..23eb5375ac --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -0,0 +1,689 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-lower" +#include "Mips16ISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <set> + +using namespace llvm; + +static cl::opt<bool> +Mips16HardFloat("mips16-hard-float", cl::NotHidden, + cl::desc("MIPS: mips16 hard float enable."), + cl::init(false)); + +static cl::opt<bool> DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); + +namespace { + std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded; +} + +Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // + // set up as if mips32 and then revert so we can test the mechanism + // for switching + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + computeRegisterProperties(); + clearRegisterClasses(); + + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + + if (Mips16HardFloat) + setMips16HardFloatLibCalls(); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMips16TargetLowering(MipsTargetMachine &TM) { + return new Mips16TargetLowering(TM); +} + +bool +Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + return false; +} + +MachineBasicBlock * +Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::SelBeqZ: + return emitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return emitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; + case Mips::SltCCRxRy16: + return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return emitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); + } +} + +bool Mips16TargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + // No tail call optimization for mips16. + return false; +} + +void Mips16TargetLowering::setMips16LibcallName + (RTLIB::Libcall L, const char *Name) { + setLibcallName(L, Name); + NoHelperNeeded.insert(Name); +} + +void Mips16TargetLowering::setMips16HardFloatLibCalls() { + setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); +} + + +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* Mips16TargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; + assert(stubNum <= maxStubNum); + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + +void Mips16TargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + SelectionDAG &DAG = CLI.DAG; + const char* Mips16HelperFunction = 0; + bool NeedMips16Helper = false; + + if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool LookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) { + if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { + LookupHelper = false; + } + } + if (LookupHelper) Mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); + + } + + SDValue JumpTarget = Callee; + + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned V0Reg = Mips::V0; + if (NeedMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } else + RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); + } + + Ops.push_back(JumpTarget); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock *Mips16TargetLowering:: +emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock *Mips16TargetLowering::emitSeliT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock + *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h new file mode 100644 index 0000000000..b23e2a1f37 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -0,0 +1,80 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef Mips16ISELLOWERING_H +#define Mips16ISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class Mips16TargetLowering : public MipsTargetLowering { + public: + explicit Mips16TargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + void setMips16LibcallName(RTLIB::Libcall, const char *Name); + + void setMips16HardFloatLibCalls(); + + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; +} + +#endif // Mips16ISELLOWERING_H diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index fd3cc8f190..17dd2c0796 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -98,10 +98,10 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void Mips16InstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, + int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); @@ -110,14 +110,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SwRxSpImmX16; assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } void Mips16InstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); @@ -126,7 +125,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) Opc = Mips::LwRxSpImmX16; assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) .addMemOperand(MMO); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 1cb1dfe196..a77a9043bb 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -48,17 +48,19 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + virtual void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + virtual void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index a9e9c52716..aa51aaf465 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -15,7 +15,7 @@ // Mips Address // def addr16 : - ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; + ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>; // // Address operand @@ -1466,14 +1466,14 @@ def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; // MipsDivRem // def: Mips16Pat - <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry), + <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry), (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; // // MipsDivRemU // def: Mips16Pat - <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry), + <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry), (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; // signed a,b diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 0ea9368949..6cca227685 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -72,6 +72,12 @@ bool Mips16RegisterInfo::saveScavengerRegister return true; } +const TargetRegisterClass * +Mips16RegisterInfo::intRegClass(unsigned Size) const { + assert(Size == 4); + return &Mips::CPU16RegsRegClass; +} + void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index b8f818a478..2b3d2b1a4e 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -37,6 +37,8 @@ public: const TargetRegisterClass *RC, unsigned Reg) const; + virtual const TargetRegisterClass *intRegClass(unsigned Size) const; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 5903b9e623..846a8224af 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -66,6 +66,14 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>; } +/// Pseudo instructions for loading, storing and copying accumulator registers. +let isPseudo = 1 in { + defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>; + defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>; +} + +def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>; + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -179,10 +187,16 @@ def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; -def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, - MULT_FM<0, 0x1e>; -def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, - MULT_FM<0, 0x1f>; +def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult, + IIImul>; +def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu, + IIImul>; +def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>; +def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>; +def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem, + IIIdiv, 0>; +def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU, + IIIdiv, 0>; def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>; def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>; @@ -306,6 +320,10 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)), // bswap MipsPattern def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; +// mflo/hi patterns. +def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>; + //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// @@ -332,13 +350,19 @@ def : InstAlias<"not $rt, $rs", def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>; def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>, Requires<[HasMips64]>; +def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>, + Requires<[HasMips64]>; +def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>, + Requires<[HasMips64]>; def : InstAlias<"daddu $rs, $rt, $imm", (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), 1>; def : InstAlias<"dadd $rs, $rt, $imm", (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), 1>; - +def : InstAlias<"or $rs, $rt, $imm", + (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), + 1>, Requires<[HasMips64]>; /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64" in { diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 9531b91487..3c116e1264 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -20,17 +20,18 @@ def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; // Mips-specific dsp nodes -def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; -def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; +def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, + SDTCisVT<2, untyped>]>; +def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, + SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>; +def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, + SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; class MipsDSPBase<string Opc, SDTypeProfile Prof> : - SDNode<!strconcat("MipsISD::", Opc), Prof, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + SDNode<!strconcat("MipsISD::", Opc), Prof>; class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> : - SDNode<!strconcat("MipsISD::", Opc), Prof, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>; + SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>; def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>; def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>; @@ -40,7 +41,7 @@ def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>; def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>; def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>; -def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>; +def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>; def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>; def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>; @@ -383,7 +384,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, InstrItinClass itin> { dag OutOperandList = (outs CPURegs:$rt); - dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs); + dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; list<Register> Defs = [DSPCtrl]; @@ -392,46 +393,40 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, InstrItinClass itin> { dag OutOperandList = (outs CPURegs:$rt); - dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs); + dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; list<Register> Defs = [DSPCtrl]; } -class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>, - PseudoInstExpansion<(realinst AC0, simm16:$shift)> { - list<Register> Defs = [DSPCtrl, AC0]; - list<Register> Uses = [AC0]; - InstrItinClass Itinerary = itin; -} - -class SHILO_R1_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins simm16:$shift); +class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode immSExt6:$shift, ACRegsDSP:$acin))]; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$acin = $ac"; } -class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>, - PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> { - list<Register> Defs = [DSPCtrl, AC0]; - list<Register> Uses = [AC0]; - InstrItinClass Itinerary = itin; -} - -class SHILO_R2_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins CPURegs:$rs); +class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, ACRegsDSP:$acin))]; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$acin = $ac"; } -class MTHLIP_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins CPURegs:$rs); +class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, ACRegsDSP:$acin))]; + list<Register> Uses = [DSPCtrl]; + string Constraints = "$acin = $ac"; } class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -454,35 +449,37 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, list<Register> Defs = [DSPCtrl]; } -class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), - [(OpNode CPURegs:$rs, CPURegs:$rt)]>, - PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { - list<Register> Defs = [DSPCtrl, AC0]; - list<Register> Uses = [AC0]; - InstrItinClass Itinerary = itin; +class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))]; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$acin = $ac"; } -class DPA_W_PH_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); +class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs ACRegsDSP:$ac); dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); -} - -class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), - [(OpNode CPURegs:$rs, CPURegs:$rt)]>, - PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { - list<Register> Defs = [DSPCtrl, AC0]; + list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))]; InstrItinClass Itinerary = itin; + int AddedComplexity = 20; + bit isCommutable = 1; } -class MULT_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); +class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))]; + InstrItinClass Itinerary = itin; + int AddedComplexity = 20; + string Constraints = "$acin = $ac"; } class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : @@ -717,44 +714,40 @@ class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, NoItinerary, DSPRegs, DSPRegs>, IsCommutable; -class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">; +class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph", + MipsMULSAQ_S_W_PH>; -class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">; +class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>; -class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">; +class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>; -class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">; +class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>; -class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">; +class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>; // Dot product with accumulate/subtract -class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">; - -class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">; - -class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">; - -class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">; +class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>; -class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">; +class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>; -class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">; +class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>; -class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">; +class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>; -class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">; +class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>; -class MULT_DSP_DESC : MULT_DESC_BASE<"mult">; +class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>; -class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">; +class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>; -class MADD_DSP_DESC : MULT_DESC_BASE<"madd">; +class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>; -class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">; - -class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">; - -class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">; +class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>; +class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>; +class MADD_DSP_DESC : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>; +class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>; +class MSUB_DSP_DESC : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>; +class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>; // Comparison class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", @@ -867,11 +860,11 @@ class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>; -class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">; +class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>; -class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">; +class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>; -class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">; +class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>; class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; @@ -975,23 +968,25 @@ class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, IsCommutable; // Dot product with accumulate/subtract -class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">; +class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>; -class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">; +class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>; -class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">; +class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>; -class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">; +class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph", + MipsDPAQX_SA_W_PH>; -class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">; +class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>; -class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">; +class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>; -class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">; +class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>; -class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">; +class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph", + MipsDPSQX_SA_W_PH>; -class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">; +class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>; // Precision reduce/expand class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", @@ -1206,71 +1201,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC; } // Pseudos. -def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary, - MULSAQ_S_W_PH>; -def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary, - MAQ_S_W_PHL>; -def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary, - MAQ_S_W_PHR>; -def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary, - MAQ_SA_W_PHL>; -def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary, - MAQ_SA_W_PHR>; -def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary, - DPAU_H_QBL>; -def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary, - DPAU_H_QBR>; -def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary, - DPSU_H_QBL>; -def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary, - DPSU_H_QBR>; -def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary, - DPAQ_S_W_PH>; -def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary, - DPSQ_S_W_PH>; -def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary, - DPAQ_SA_L_W>; -def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary, - DPSQ_SA_L_W>; - -def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>, - IsCommutable; -def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>, - IsCommutable; -def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>, - IsCommutable, UseAC; -def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>, - IsCommutable, UseAC; -def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>, - UseAC; -def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>, - UseAC; - -def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>; -def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>; -def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>; - -let Predicates = [HasDSPR2] in { - -def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>; -def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>; -def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary, - DPAQX_S_W_PH>; -def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary, - DPAQX_SA_W_PH>; -def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary, - DPAX_W_PH>; -def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary, - DPSX_W_PH>; -def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary, - DPSQX_S_W_PH>; -def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary, - DPSQX_SA_W_PH>; -def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary, - MULSA_W_PH>; - +/// Pseudo instructions for loading, storing and copying accumulator registers. +let isPseudo = 1 in { + defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>; + defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>; } +def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>; + // Patterns. class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : Pat<pattern, result>, Requires<[pred]>; @@ -1296,10 +1234,12 @@ def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), // Extr patterns. class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> : - DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>; + DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)), + (Instr ACRegsDSP:$ac, CPURegs:$rs)>; class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> : - DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>; + DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)), + (Instr ACRegsDSP:$ac, immZExt5:$shift)>; def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>; def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>; @@ -1313,3 +1253,19 @@ def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>; def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>; def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>; def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>; + +// mflo/hi patterns. +let AddedComplexity = 20 in +def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>; + +// Indexed load patterns. +class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> : + DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))), + (Instr i32:$base, i32:$index)>; + +let AddedComplexity = 20 in { + def : IndexedLoadPat<zextloadi8, LBUX>; + def : IndexedLoadPat<sextloadi16, LHX>; + def : IndexedLoadPat<load, LWX>; +} diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index e265590141..d07a595af3 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -220,9 +220,9 @@ namespace { /// that can be moved to the delay slot. Returns true on success. bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; - /// This function searches MBB's successor blocks for an instruction that - /// can be moved to the delay slot and inserts clones of the instruction - /// into the successor blocks. + /// This function searches one of MBB's successor blocks for an instruction + /// that can be moved to the delay slot and inserts clones of the + /// instruction into the successor's predecessor blocks. bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; /// Pick a successor block of MBB. Return NULL if MBB doesn't have a diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 14268d2130..6a5f79d0df 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -26,9 +26,8 @@ protected: const MipsSubtarget &STI; public: - explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0, - sti.hasMips64() ? 16 : 8), STI(sti) {} + explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment) + : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {} static const MipsFrameLowering *create(MipsTargetMachine &TM, const MipsSubtarget &ST); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 6dff548505..77b08cb11e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -12,19 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-isel" +#include "MipsISelDAGToDAG.h" +#include "Mips16ISelDAGToDAG.h" +#include "MipsSEISelDAGToDAG.h" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" -#include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -45,270 +45,11 @@ using namespace llvm; // MipsDAGToDAGISel - MIPS specific code to select MIPS machine // instructions for SelectionDAG operations. //===----------------------------------------------------------------------===// -namespace { - -class MipsDAGToDAGISel : public SelectionDAGISel { - - /// TM - Keep a reference to MipsTargetMachine. - MipsTargetMachine &TM; - - /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can - /// make the right decision when generating code for different targets. - const MipsSubtarget &Subtarget; - -public: - explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - - // Pass Name - virtual const char *getPassName() const { - return "MIPS DAG->DAG Pattern Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - -private: - // Include the pieces autogenerated from the target description. - #include "MipsGenDAGISel.inc" - - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. - const MipsTargetMachine &getTargetMachine() { - return static_cast<const MipsTargetMachine &>(TM); - } - - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. - const MipsInstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); - } - - SDNode *getGlobalBaseReg(); - - SDValue getMips16SPAliasReg(); - - void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); - - std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, - EVT Ty, bool HasLo, bool HasHi); - - SDNode *Select(SDNode *N); - - // Complex Pattern. - /// (reg + imm). - bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Fall back on this function if all else fails. - bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Match integer address pattern. - bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Alias); - - // getImm - Return a target constant with the specified value. - inline SDValue getImm(const SDNode *Node, uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); - } - - void ProcessFunctionAfterISel(MachineFunction &MF); - bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); - void InitGlobalBaseReg(MachineFunction &MF); - void InitMips16SPAliasReg(MachineFunction &MF); - - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); -}; - -} - -// Insert instructions to initialize the global base register in the -// first MBB of the function. When the ABI is O32 and the relocation model is -// PIC, the necessary instructions are emitted later to prevent optimization -// passes from moving them. -void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->globalBaseRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - const TargetRegisterClass *RC; - - if (Subtarget.isABI_N64()) - RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - else if (Subtarget.inMips16Mode()) - RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; - else - RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - V0 = RegInfo.createVirtualRegister(RC); - V1 = RegInfo.createVirtualRegister(RC); - V2 = RegInfo.createVirtualRegister(RC); - - if (Subtarget.isABI_N64()) { - MF.getRegInfo().addLiveIn(Mips::T9_64); - MBB.addLiveIn(Mips::T9_64); - - // lui $v0, %hi(%neg(%gp_rel(fname))) - // daddu $v1, $v0, $t9 - // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) - .addReg(Mips::T9_64); - BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - if (Subtarget.inMips16Mode()) { - BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); - BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) - .addReg(V1).addReg(V2); - return; - } - - if (MF.getTarget().getRelocationModel() == Reloc::Static) { - // Set global register to __gnu_local_gp. - // - // lui $v0, %hi(__gnu_local_gp) - // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); - return; - } - - MF.getRegInfo().addLiveIn(Mips::T9); - MBB.addLiveIn(Mips::T9); - - if (Subtarget.isABI_N32()) { - // lui $v0, %hi(%neg(%gp_rel(fname))) - // addu $v1, $v0, $t9 - // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - assert(Subtarget.isABI_O32()); - - // For O32 ABI, the following instruction sequence is emitted to initialize - // the global base register: - // - // 0. lui $2, %hi(_gp_disp) - // 1. addiu $2, $2, %lo(_gp_disp) - // 2. addu $globalbasereg, $2, $t9 - // - // We emit only the last instruction here. - // - // GNU linker requires that the first two instructions appear at the beginning - // of a function and no instructions be inserted before or between them. - // The two instructions are emitted during lowering to MC layer in order to - // avoid any reordering. - // - // Register $2 (Mips::V0) is added to the list of live-in registers to ensure - // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) - // reads it. - MF.getRegInfo().addLiveIn(Mips::V0); - MBB.addLiveIn(Mips::V0); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) - .addReg(Mips::V0).addReg(Mips::T9); -} - -// Insert instructions to initialize the Mips16 SP Alias register in the -// first MBB of the function. -// -void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->mips16SPAliasRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); - - BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) - .addReg(Mips::SP); -} - - -bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, - const MachineInstr& MI) { - unsigned DstReg = 0, ZeroReg = 0; - - // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". - if ((MI.getOpcode() == Mips::ADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO; - } else if ((MI.getOpcode() == Mips::DADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO_64) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO_64; - } - - if (!DstReg) - return false; - - // Replace uses with ZeroReg. - for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E;) { - MachineOperand &MO = U.getOperand(); - unsigned OpNo = U.getOperandNo(); - MachineInstr *MI = MO.getParent(); - ++U; - - // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) - continue; - - MO.setReg(ZeroReg); - } - - return true; -} - -void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { - InitGlobalBaseReg(MF); - InitMips16SPAliasReg(MF); - - MachineRegisterInfo *MRI = &MF.getRegInfo(); - - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; - ++MFI) - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - ReplaceUsesWithZeroReg(MRI, *I); -} bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - ProcessFunctionAfterISel(MF); + processFunctionAfterISel(MF); return Ret; } @@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -/// getMips16SPAliasReg - Output the instructions required to put the -/// SP into a Mips16 accessible aliased register. -SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { - unsigned Mips16SPAliasReg = - MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); -} - /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { - EVT ValTy = Addr.getValueType(); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; - } - - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - } - + llvm_unreachable("Unimplemented function."); return false; } bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); - return true; + llvm_unreachable("Unimplemented function."); + return false; } bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrRegImm(Addr, Base, Offset) || - selectAddrDefault(Addr, Base, Offset); -} - -void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); - if (Parent) { - switch (Parent->getOpcode()) { - case ISD::LOAD: { - LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - case ISD::STORE: { - StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - } - } - AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); - return; - -} -bool MipsDAGToDAGISel::SelectAddr16( - SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, - SDValue &Alias) { - EVT ValTy = Addr.getValueType(); - - Alias = CurDAG->getTargetConstant(0, ValTy); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - getMips16SPRefReg(Parent, Alias); - return true; - } - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - getMips16SPRefReg(Parent, Alias); - } - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; + llvm_unreachable("Unimplemented function."); + return false; } -/// Select multiply instructions. -std::pair<SDNode*, SDNode*> -MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, - bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; - SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0), - N->getOperand(1)); - SDValue InFlag = SDValue(Mul, 0); - - if (HasLo) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 : - (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag); - InFlag = SDValue(Lo, 1); - } - if (HasHi) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 : - (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); - Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag); - } - return std::make_pair(Lo, Hi); +bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias) { + llvm_unreachable("Unimplemented function."); + return false; } - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return NULL; } - /// - // Instruction Selection not handled by the auto-generated - // tablegen selection should be handled here. - /// - EVT NodeTy = Node->getValueType(0); - unsigned MultOpc; + // See if subclasses can handle this node. + std::pair<bool, SDNode*> Ret = selectNode(Node); + + if (Ret.first) + return Ret.second; switch(Opcode) { default: break; - case ISD::SUBE: - case ISD::ADDE: { - bool inMips16Mode = Subtarget.inMips16Mode(); - SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned MOp; - if (Opcode == ISD::ADDE) { - CmpLHS = InFlag.getValue(0); - if (inMips16Mode) - MOp = Mips::AdduRxRyRz16; - else - MOp = Mips::ADDu; - } else { - CmpLHS = InFlag.getOperand(0); - if (inMips16Mode) - MOp = Mips::SubuRxRyRz16; - else - MOp = Mips::SUBu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; - - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - - EVT VT = LHS.getValueType(); - - unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; - SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); - unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; - SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, - SDValue(Carry,0), RHS); - - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, - LHS, SDValue(AddCarry,0)); - } - - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : - Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - - std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy, - true, true); - - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - - return NULL; - } - - /// Special Muls - case ISD::MUL: { - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && NodeTy == MVT::i32) - break; - return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT, - dl, NodeTy, true, false).first; - } - case ISD::MULHS: - case ISD::MULHU: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::MULHU ? - Mips::MultuRxRy16 : Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); - - return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second; - } - // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); - case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.hasMips64()) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO_64, MVT::i64); - return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero); - } - - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO, MVT::i32); - return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, - Zero); - } - break; - } - - case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); - unsigned Size = CN->getValueSizeInBits(0); - - if (Size == 32) - break; - - MipsAnalyzeImmediate AnalyzeImm; - int64_t Imm = CN->getSExtValue(); - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, false); - - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - DebugLoc DL = CN->getDebugLoc(); - SDNode *RegOpnd; - SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - - // The first instruction can be a LUi which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == Mips::LUi64) - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); - else - RegOpnd = - CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - CurDAG->getRegister(Mips::ZERO_64, MVT::i64), - ImmOpnd); - - // The remaining instructions in the sequence are handled here. - for (++Inst; Inst != Seq.end(); ++Inst) { - ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - SDValue(RegOpnd, 0), ImmOpnd); - } - - return RegOpnd; - } - #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: @@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { "Unexpected unaligned loads/stores."); break; #endif - - case MipsISD::ThreadPointer: { - EVT PtrVT = TLI.getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; - - if (PtrVT == MVT::i32) { - RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; - DestReg = Mips::V1; - } else { - RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; - DestReg = Mips::V1_64; - } - - SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), - Node->getValueType(0), - CurDAG->getRegister(SrcReg, PtrVT)); - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); - ReplaceUses(SDValue(Node, 0), ResNode); - return ResNode.getNode(); - } } // Select the default instruction @@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { - return new MipsDAGToDAGISel(TM); + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16ISelDag(TM); + + return llvm::createMipsSEISelDag(TM); } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h new file mode 100644 index 0000000000..cf0f9c58aa --- /dev/null +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -0,0 +1,93 @@ +//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSISELDAGTODAG_H +#define MIPSISELDAGTODAG_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class MipsDAGToDAGISel : public SelectionDAGISel { +public: + explicit MipsDAGToDAGISel(MipsTargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + +protected: + SDNode *getGlobalBaseReg(); + + /// Keep a pointer to the MipsSubtarget around so that we can make the right + /// decision when generating code for different targets. + const MipsSubtarget &Subtarget; + +private: + // Include the pieces autogenerated from the target description. + #include "MipsGenDAGISel.inc" + + // Complex Pattern. + /// (reg + imm). + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Fall back on this function if all else fails. + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match integer address pattern. + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual SDNode *Select(SDNode *N); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0; + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + } + + virtual void processFunctionAfterISel(MachineFunction &MF) = 0; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +}; + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c452dee8d0..e2219f257e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" -#include <set> #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -42,26 +41,9 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); - -static cl::opt<bool> LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); -static cl::opt<bool> -Mips16HardFloat("mips16-hard-float", cl::NotHidden, - cl::desc("MIPS: mips16 hard float enable."), - cl::init(false)); - -static cl::opt<bool> DontExpandCondPseudos16( - "mips16-dont-expand-cond-pseudo", - cl::init(false), - cl::desc("Dont expand conditional move related " - "pseudos for Mips 16"), - cl::Hidden); - - static const uint16_t O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } -static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) { +SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } @@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { +SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, + bool HasMips64) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; @@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { +SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, + unsigned Flag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), @@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { MachinePointerInfo::getGOT(), false, false, false, 0); } -static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag) { +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, + unsigned LoFlag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); @@ -173,12 +158,18 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F"; case MipsISD::FPRound: return "MipsISD::FPRound"; + case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI"; + case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI"; + case MipsISD::Mult: return "MipsISD::Mult"; + case MipsISD::Multu: return "MipsISD::Multu"; case MipsISD::MAdd: return "MipsISD::MAdd"; case MipsISD::MAddu: return "MipsISD::MAddu"; case MipsISD::MSub: return "MipsISD::MSub"; case MipsISD::MSubu: return "MipsISD::MSubu"; case MipsISD::DivRem: return "MipsISD::DivRem"; case MipsISD::DivRemU: return "MipsISD::DivRemU"; + case MipsISD::DivRem16: return "MipsISD::DivRem16"; + case MipsISD::DivRemU16: return "MipsISD::DivRemU16"; case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::Wrapper: return "MipsISD::Wrapper"; @@ -211,110 +202,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -namespace { - struct LTStr { - bool operator()(const char *S1, const char *S2) const - { - return strcmp(S1, S2) < 0; - } - }; - - std::set<const char*, LTStr> NoHelperNeeded; -} - -void MipsTargetLowering::setMips16LibcallName - (RTLIB::Libcall L, const char *Name) { - setLibcallName(L, Name); - NoHelperNeeded.insert(Name); -} - -void MipsTargetLowering::setMips16HardFloatLibCalls() { - setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); -} - MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(&TM.getSubtarget<MipsSubtarget>()), HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()), IsO32(Subtarget->isABI_O32()) { - // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - // Set up the register classes - addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); - - if (HasMips64) - addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); - - if (Subtarget->inMips16Mode()) { - addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - if (Mips16HardFloat) - setMips16HardFloatLibCalls(); - } - - if (Subtarget->hasDSP()) { - MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; - - for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); - - // Expand all builtin opcodes. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VecTys[i], Expand); - - setOperationAction(ISD::LOAD, VecTys[i], Legal); - setOperationAction(ISD::STORE, VecTys[i], Legal); - setOperationAction(ISD::BITCAST, VecTys[i], Legal); - } - } - - if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f32, &Mips::FGR32RegClass); - - // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (HasMips64) - addRegisterClass(MVT::f64, &Mips::FGR64RegClass); - else - addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); - } - } - // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -348,18 +246,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - } - else { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - } - if (!Subtarget->inMips16Mode()) { - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - } if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -472,21 +358,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - } - setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -511,8 +382,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setTruncStoreAction(MVT::i64, MVT::i32, Custom); } - setTargetDAGCombine(ISD::ADDE); - setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SELECT); @@ -523,7 +392,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); - computeRegisterProperties(); setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); @@ -531,22 +399,11 @@ MipsTargetLowering(MipsTargetMachine &TM) MaxStoresPerMemcpy = 16; } -bool -MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { - MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - - if (Subtarget->inMips16Mode()) - return false; +const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16TargetLowering(TM); - switch (SVT) { - case MVT::i64: - case MVT::i32: - if (Fast) - *Fast = true; - return true; - default: - return false; - } + return llvm::createMipsSETargetLowering(TM); } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { @@ -555,178 +412,6 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { return VT.changeVectorElementTypeToInteger(); } -// selectMADD - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc multLo, Lo0), (adde multHi, Hi0), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order - // for the matching to be successful. - SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); - - if (ADDCNode->getOpcode() != ISD::ADDC) - return false; - - SDValue MultHi = ADDENode->getOperand(0); - SDValue MultLo = ADDCNode->getOperand(0); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MADD only if ADDENode and ADDCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDValue Chain = CurDAG->getEntryNode(); - DebugLoc DL = ADDENode->getDebugLoc(); - - // create MipsMAdd(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - - SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ADDCNode->getOperand(1),// Lo0 - ADDENode->getOperand(1));// Hi0 - - // create CopyFromReg nodes - SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32, - MAdd); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL, - Mips::HI, MVT::i32, - CopyFromLo.getValue(2)); - - // replace uses of adde and addc here - if (!SDValue(ADDCNode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo); - - if (!SDValue(ADDENode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi); - - return true; -} - -// selectMSUB - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc Lo0, multLo), (sube Hi0, multHi), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order - // for the matching to be successful. - SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); - - if (SUBCNode->getOpcode() != ISD::SUBC) - return false; - - SDValue MultHi = SUBENode->getOperand(1); - SDValue MultLo = SUBCNode->getOperand(1); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MSUB only if SUBENode and SUBCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than SUBENode or SUBCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MSUB instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDValue Chain = CurDAG->getEntryNode(); - DebugLoc DL = SUBENode->getDebugLoc(); - - // create MipsSub(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - - SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - SUBCNode->getOperand(0),// Lo0 - SUBENode->getOperand(0));// Hi0 - - // create CopyFromReg nodes - SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32, - MSub); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL, - Mips::HI, MVT::i32, - CopyFromLo.getValue(2)); - - // replace uses of sube and subc here - if (!SDValue(SUBCNode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo); - - if (!SDValue(SUBENode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi); - - return true; -} - -static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMADD(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - -static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMSUB(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { @@ -736,8 +421,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, EVT Ty = N->getValueType(0); unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; - unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : - MipsISD::DivRemU; + unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 : + MipsISD::DivRemU16; DebugLoc DL = N->getDebugLoc(); SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue, @@ -791,8 +476,9 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { } -// Returns true if condition code has to be inverted. -static bool invertFPCondCode(Mips::CondCode CC) { +/// This function returns true if the floating point conditional branches and +/// conditional moves which use condition code CC should be inverted. +static bool invertFPCondCodeUser(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return false; @@ -828,9 +514,8 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { // Creates and returns a CMovFPT/F node. static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, DebugLoc DL) { - bool invert = invertFPCondCode((Mips::CondCode) - cast<ConstantSDNode>(Cond.getOperand(2)) - ->getSExtValue()); + ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2)); + bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue()); return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, True.getValueType(), True, False, Cond); @@ -997,10 +682,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) switch (Opc) { default: break; - case ISD::ADDE: - return performADDECombine(N, DAG, DCI, Subtarget); - case ISD::SUBE: - return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::SDIVREM: case ISD::UDIVREM: return performDivRemCombine(N, DAG, DCI, Subtarget); @@ -1042,32 +723,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::BR_JT: return lowerBR_JT(Op, DAG); - case ISD::BRCOND: return lowerBRCOND(Op, DAG); - case ISD::ConstantPool: return lowerConstantPool(Op, DAG); - case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); - case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); - case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); - case ISD::JumpTable: return lowerJumpTable(Op, DAG); - case ISD::SELECT: return lowerSELECT(Op, DAG); - case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); - case ISD::SETCC: return lowerSETCC(Op, DAG); - case ISD::VASTART: return lowerVASTART(Op, DAG); - case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); - case ISD::FABS: return lowerFABS(Op, DAG); - case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); - case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); - case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); - case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG); - case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); - case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); - case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); - case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); - case ISD::LOAD: return lowerLOAD(Op, DAG); - case ISD::STORE: return lowerSTORE(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); - case ISD::ADD: return lowerADD(Op, DAG); + case ISD::BR_JT: return lowerBR_JT(Op, DAG); + case ISD::BRCOND: return lowerBRCOND(Op, DAG); + case ISD::ConstantPool: return lowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); + case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return lowerJumpTable(Op, DAG); + case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); + case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::VASTART: return lowerVASTART(Op, DAG); + case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); + case ISD::FABS: return lowerFABS(Op, DAG); + case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); + case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); + case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); + case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::STORE: return lowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::ADD: return lowerADD(Op, DAG); } return SDValue(); } @@ -1087,358 +768,6 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) return VReg; } -// Get fp branch code (not opcode) from condition code. -static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) { - if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) - return Mips::BRANCH_T; - - assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) && - "Invalid CondCode."); - - return Mips::BRANCH_F; -} - -MachineBasicBlock * -MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ - // $bb: - // bposge32_pseudo $vr0 - // => - // $bb: - // bposge32 $tbb - // $fbb: - // li $vr2, 0 - // b $sink - // $tbb: - // li $vr1, 1 - // $sink: - // $vr0 = phi($vr2, $fbb, $vr1, $tbb) - - MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetRegisterClass *RC = &Mips::CPURegsRegClass; - DebugLoc DL = MI->getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); - MachineFunction *F = BB->getParent(); - MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, FBB); - F->insert(It, TBB); - F->insert(It, Sink); - - // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - Sink->transferSuccessorsAndUpdatePHIs(BB); - - // Add successors. - BB->addSuccessor(FBB); - BB->addSuccessor(TBB); - FBB->addSuccessor(Sink); - TBB->addSuccessor(Sink); - - // Insert the real bposge32 instruction to $BB. - BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); - - // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) - .addReg(Mips::ZERO).addImm(0); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); - - // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); - BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) - .addReg(Mips::ZERO).addImm(1); - - // Insert phi function to $Sink. - BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), - MI->getOperand(0).getReg()) - .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return Sink; -} - -MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) - .addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - -MachineBasicBlock *MipsTargetLowering::emitSelT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addReg(MI->getOperand(4).getReg()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock *MipsTargetLowering::emitSeliT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addImm(MI->getOperand(4).getImm()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock - *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - unsigned regY = MI->getOperand(1).getReg(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - int64_t imm = MI->getOperand(1).getImm(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - unsigned CmpOpc; - if (isUInt<8>(imm)) - CmpOpc = CmpiOpc; - else if (isUInt<16>(imm)) - CmpOpc = CmpiXOpc; - else - llvm_unreachable("immediate field not usable"); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -static unsigned Mips16WhichOp8uOr16simm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - unsigned regY = MI->getOperand(2).getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - int64_t Imm = MI->getOperand(2).getImm(); - unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addImm(Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1548,77 +877,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return emitAtomicCmpSwap(MI, BB, 8); - case Mips::BPOSGE32_PSEUDO: - return emitBPOSGE32(MI, BB); - case Mips::SelBeqZ: - return emitSel16(Mips::BeqzRxImm16, MI, BB); - case Mips::SelBneZ: - return emitSel16(Mips::BnezRxImm16, MI, BB); - case Mips::SelTBteqZCmpi: - return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBteqZSlti: - return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBteqZSltiu: - return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBtneZCmpi: - return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBtneZSlti: - return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBtneZSltiu: - return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBteqZCmp: - return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBteqZSlt: - return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBteqZSltu: - return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::SelTBtneZCmp: - return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBtneZSlt: - return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBtneZSltu: - return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::BteqzT8SltX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::BteqzT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::BtnezT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::BtnezT8SltX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::BtnezT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - break; - case Mips::SltCCRxRy16: - return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); - break; - case Mips::SltiCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::SltiuCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SltuCCRxRy16: - return emitFEXT_CCRX16_ins - (Mips::SltuRxRy16, MI, BB); } } @@ -2140,8 +1398,8 @@ lowerBRCOND(SDValue Op, SelectionDAG &DAG) const SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue(); - SDValue BrCode = DAG.getConstant(getFPBranchCodeFromCond(CC), MVT::i32); - + unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T; + SDValue BrCode = DAG.getConstant(Opc, MVT::i32); return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, Dest, CondRes); } @@ -2792,6 +2050,22 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } +static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) { + SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, + DAG.getConstant(0, MVT::i32)); + SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi); +} + +static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) { + SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, + DAG.getConstant(Mips::sub_lo, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, + DAG.getConstant(Mips::sub_hi, MVT::i32)); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); +} + // This function expands mips intrinsic nodes which have 64-bit input operands // or output values. // @@ -2804,48 +2078,51 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v1 = copy hi // out64 = merge-values (v0, v1) // -static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, - unsigned Opc, bool HasI64In, bool HasI64Out) { +static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { DebugLoc DL = Op.getDebugLoc(); bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; - SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode(); SmallVector<SDValue, 3> Ops; + unsigned OpNo = 0; - if (HasI64In) { - SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, - Op->getOperand(1 + HasChainIn), - DAG.getConstant(0, MVT::i32)); - SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, - Op->getOperand(1 + HasChainIn), - DAG.getConstant(1, MVT::i32)); + // See if Op has a chain input. + if (HasChainIn) + Ops.push_back(Op->getOperand(OpNo++)); - Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue()); - Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1)); + // The next operand is the intrinsic opcode. + assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); - Ops.push_back(Chain); - Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end()); - Ops.push_back(Chain.getValue(1)); - } else { - Ops.push_back(Chain); - Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end()); - } + // See if the next operand has type i64. + SDValue Opnd = Op->getOperand(++OpNo), In64; + + if (Opnd.getValueType() == MVT::i64) + In64 = initAccumulator(Opnd, DL, DAG); + else + Ops.push_back(Opnd); + + // Push the remaining operands. + for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) + Ops.push_back(Op->getOperand(OpNo)); + + // Add In64 to the end of the list. + if (In64.getNode()) + Ops.push_back(In64); - if (!HasI64Out) - return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(), - Ops.begin(), Ops.size()); + // Scan output. + SmallVector<EVT, 2> ResTys; - SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue), - Ops.begin(), Ops.size()); - SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32, - Intr.getValue(1)); - SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32, - OutLo.getValue(2)); - SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi); + for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); + I != E; ++I) + ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); + + // Create node. + SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); + SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; if (!HasChainIn) return Out; - SDValue Vals[] = { Out, OutHi.getValue(1) }; + assert(Val->getValueType(1) == MVT::Other); + SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; return DAG.getMergeValues(Vals, 2, DL); } @@ -2855,37 +2132,37 @@ SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, default: return SDValue(); case Intrinsic::mips_shilo: - return lowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::SHILO); case Intrinsic::mips_dpau_h_qbl: - return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); case Intrinsic::mips_dpau_h_qbr: - return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); case Intrinsic::mips_dpsu_h_qbl: - return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); case Intrinsic::mips_dpsu_h_qbr: - return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); case Intrinsic::mips_dpa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); case Intrinsic::mips_dps_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); case Intrinsic::mips_dpax_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); case Intrinsic::mips_dpsx_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); case Intrinsic::mips_mulsa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); case Intrinsic::mips_mult: - return lowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); + return lowerDSPIntr(Op, DAG, MipsISD::Mult); case Intrinsic::mips_multu: - return lowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); + return lowerDSPIntr(Op, DAG, MipsISD::Multu); case Intrinsic::mips_madd: - return lowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAdd); case Intrinsic::mips_maddu: - return lowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAddu); case Intrinsic::mips_msub: - return lowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MSub); case Intrinsic::mips_msubu: - return lowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MSubu); } } @@ -2895,45 +2172,45 @@ SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, default: return SDValue(); case Intrinsic::mips_extp: - return lowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTP); case Intrinsic::mips_extpdp: - return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); case Intrinsic::mips_extr_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); case Intrinsic::mips_extr_r_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); case Intrinsic::mips_extr_rs_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); case Intrinsic::mips_extr_s_h: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); case Intrinsic::mips_mthlip: - return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); case Intrinsic::mips_mulsaq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); case Intrinsic::mips_maq_s_w_phl: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); case Intrinsic::mips_maq_s_w_phr: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); case Intrinsic::mips_maq_sa_w_phl: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); case Intrinsic::mips_maq_sa_w_phr: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); case Intrinsic::mips_dpaq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); case Intrinsic::mips_dpsq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); case Intrinsic::mips_dpaq_sa_l_w: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); case Intrinsic::mips_dpsq_sa_l_w: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); case Intrinsic::mips_dpaqx_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); case Intrinsic::mips_dpaqx_sa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); case Intrinsic::mips_dpsqx_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); case Intrinsic::mips_dpsqx_sa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); } } @@ -3074,28 +2351,6 @@ static unsigned getNextIntArgReg(unsigned Reg) { return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -/// isEligibleForTailCallOptimization - Check whether the call is eligible -/// for tail call optimization. -bool MipsTargetLowering:: -isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const { - if (!EnableMipsTailCalls) - return false; - - // No tail call optimization for mips16. - if (Subtarget->inMips16Mode()) - return false; - - // Return false if either the callee or caller has a byval argument. - if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) - return false; - - // Return true if the callee's argument area is no larger than the - // caller's. - return NextStackOffset <= FI.getIncomingArgSize(); -} - SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, DebugLoc DL, @@ -3114,161 +2369,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } -// -// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much -// cleaner way to do all of this but it will have to wait until the traditional -// gcc mechanism is completed. -// -// For Pic, in order for Mips16 code to call Mips32 code which according the abi -// have either arguments or returned values placed in floating point registers, -// we use a set of helper functions. (This includes functions which return type -// complex which on Mips are returned in a pair of floating point registers). -// -// This is an encoding that we inherited from gcc. -// In Mips traditional O32, N32 ABI, floating point numbers are passed in -// floating point argument registers 1,2 only when the first and optionally -// the second arguments are float (sf) or double (df). -// For Mips16 we are only concerned with the situations where floating point -// arguments are being passed in floating point registers by the ABI, because -// Mips16 mode code cannot execute floating point instructions to load those -// values and hence helper functions are needed. -// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) -// the helper function suffixs for these are: -// 0, 1, 5, 9, 2, 6, 10 -// this suffix can then be calculated as follows: -// for a given argument Arg: -// Arg1x, Arg2x = 1 : Arg is sf -// 2 : Arg is df -// 0: Arg is neither sf or df -// So this stub is the string for number Arg1x + Arg2x*4. -// However not all numbers between 0 and 10 are possible, we check anyway and -// assert if the impossible exists. -// - -unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber - (ArgListTy &Args) const { - unsigned int resultNum = 0; - if (Args.size() >= 1) { - Type *t = Args[0].Ty; - if (t->isFloatTy()) { - resultNum = 1; - } - else if (t->isDoubleTy()) { - resultNum = 2; - } - } - if (resultNum) { - if (Args.size() >=2) { - Type *t = Args[1].Ty; - if (t->isFloatTy()) { - resultNum += 4; - } - else if (t->isDoubleTy()) { - resultNum += 8; - } - } +void MipsTargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // Insert node "GP copy globalreg" before call to function. + // + // R_MIPS_CALL* operators (emitted when non-internal functions are called + // in PIC mode) allow symbols to be resolved via lazy binding. + // The lazy binding stub requires GP to point to the GOT. + if (IsPICCall && !InternalLinkage) { + unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } - return resultNum; -} -// -// prefixs are attached to stub numbers depending on the return type . -// return type: float sf_ -// double df_ -// single complex sc_ -// double complext dc_ -// others NO PREFIX -// -// -// The full name of a helper function is__mips16_call_stub + -// return type dependent prefix + stub number -// -// -// This is something that probably should be in a different source file and -// perhaps done differently but my main purpose is to not waste runtime -// on something that we can enumerate in the source. Another possibility is -// to have a python script to generate these mapping tables. This will do -// for now. There are a whole series of helper function mapping arrays, one -// for each return type class as outlined above. There there are 11 possible -// entries. Ones with 0 are ones which should never be selected -// -// All the arrays are similar except for ones which return neither -// sf, df, sc, dc, in which only care about ones which have sf or df as a -// first parameter. -// -#define P_ "__mips16_call_stub_" -#define MAX_STUB_NUMBER 10 -#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" -#define T P "0" , T1 -#define P P_ -static char const * vMips16Helper[MAX_STUB_NUMBER+1] = - {0, T1 }; -#undef P -#define P P_ "sf_" -static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "df_" -static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "sc_" -static char const * scMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "dc_" -static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#undef P_ - - -const char* MipsTargetLowering:: - getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const { - const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); -#ifndef NDEBUG - const unsigned int maxStubNum = 10; - assert(stubNum <= maxStubNum); - const bool validStubNum[maxStubNum+1] = - {true, true, true, false, false, true, true, false, false, true, true}; - assert(validStubNum[stubNum]); -#endif - const char *result; - if (RetTy->isFloatTy()) { - result = sfMips16Helper[stubNum]; - } - else if (RetTy ->isDoubleTy()) { - result = dfMips16Helper[stubNum]; - } - else if (RetTy->isStructTy()) { - // check if it's complex - if (RetTy->getNumContainedTypes() == 2) { - if ((RetTy->getContainedType(0)->isFloatTy()) && - (RetTy->getContainedType(1)->isFloatTy())) { - result = scMips16Helper[stubNum]; - } - else if ((RetTy->getContainedType(0)->isDoubleTy()) && - (RetTy->getContainedType(1)->isDoubleTy())) { - result = dcMips16Helper[stubNum]; - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - if (stubNum == 0) { - needHelper = false; - return ""; - } - result = vMips16Helper[stubNum]; + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } - needHelper = true; - return result; + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + + if (InFlag.getNode()) + Ops.push_back(InFlag); } /// LowerCall - functions arguments are copied from virtual regs to @@ -3287,26 +2429,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; - const char* Mips16HelperFunction = 0; - bool NeedMips16Helper = false; - - if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && - Mips16HardFloat) { - // - // currently we don't have symbols tagged with the mips16 or mips32 - // qualifier so we will assume that we don't know what kind it is. - // and generate the helper - // - bool LookupHelper = true; - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { - LookupHelper = false; - } - } - if (LookupHelper) Mips16HelperFunction = - getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); - - } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -3466,80 +2588,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GlobalOrExternal = true; } - SDValue JumpTarget = Callee; - - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - unsigned V0Reg = Mips::V0; - if (NeedMips16Helper) { - RegsToPass.push_front(std::make_pair(V0Reg, Callee)); - JumpTarget = DAG.getExternalSymbol( - Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); - } - else { - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - - if (!Subtarget->inMips16Mode()) - JumpTarget = SDValue(); - } - } - - // Insert node "GP copy globalreg" before call to function. - // - // R_MIPS_CALL* operators (emitted when non-internal functions are called - // in PIC mode) allow symbols to be resolved via lazy binding. - // The lazy binding stub requires GP to point to the GOT. - if (IsPICCall && !InternalLinkage) { - unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; - EVT Ty = IsN64 ? MVT::i64 : MVT::i32; - RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty))); - } - - // Build a sequence of copy-to-reg nodes chained together with token - // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be - // stuck together. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // MipsJmpLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... - // - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (JumpTarget.getNode()) - Ops.push_back(JumpTarget); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - if (InFlag.getNode()) - Ops.push_back(InFlag); + getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, + CLI, Callee, Chain); if (IsTailCall) return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size()); Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); + SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, @@ -4124,14 +3183,14 @@ static bool isF128SoftLibCall(const char *CallSym) { const char * const *End = LibCalls + array_lengthof(LibCalls); // Check that LibCalls is sorted alphabetically. -#ifndef NDEBUG - LTStr Comp; + MipsTargetLowering::LTStr Comp; +#ifndef NDEBUG for (const char * const *I = LibCalls; I < End - 1; ++I) assert(Comp(*I, *(I + 1))); #endif - return std::binary_search(LibCalls, End, CallSym, LTStr()); + return std::binary_search(LibCalls, End, CallSym, Comp); } /// This function returns true if Ty is fp128 or i128 which was originally a diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index de925e16ab..cab71a61e0 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -68,6 +68,16 @@ namespace llvm { EH_RETURN, + // Node used to extract integer from accumulator. + ExtractLOHI, + + // Node used to insert integers to accumulator. + InsertLOHI, + + // Mult nodes. + Mult, + Multu, + // MAdd/Sub nodes MAdd, MAddu, @@ -77,6 +87,8 @@ namespace llvm { // DivRem(u) DivRem, DivRemU, + DivRem16, + DivRemU16, BuildPairF64, ExtractElementF64, @@ -152,9 +164,9 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + static const MipsTargetLowering *create(MipsTargetMachine &TM); - virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -177,17 +189,34 @@ namespace llvm { EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - private: - void setMips16LibcallName(RTLIB::Libcall, const char *Name); + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + struct LTStr { + bool operator()(const char *S1, const char *S2) const { + return strcmp(S1, S2) < 0; + } + }; - void setMips16HardFloatLibCalls(); + protected: + SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - unsigned int - getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; - const char *getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; + + SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, unsigned LoFlag) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; /// ByValArgInfo - Byval argument information. struct ByValArgInfo { @@ -283,6 +312,7 @@ namespace llvm { bool HasMips64, IsN64, IsO32; + private: // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -321,9 +351,10 @@ namespace llvm { /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. - bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const = 0; /// copyByValArg - Copy argument registers which were used to pass a byval /// argument to the stack. Create a stack frame object for the byval @@ -377,10 +408,6 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; - // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; @@ -419,8 +446,6 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, - MachineBasicBlock *BB) const; MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI, @@ -430,29 +455,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; - MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const; - }; + + /// Create MipsTargetLowering objects. + const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM); + const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM); } #endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 891bdc1224..6b23057c9c 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -503,32 +503,27 @@ let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } -// Load/Store patterns. +// Patterns for loads/stores with a reg+imm operand. let AddedComplexity = 40 in { let Predicates = [IsN64, HasStdEnc] in { - def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>; - def : MipsPat<(store FGR32:$v, addrRegImm:$a), - (SWC1_P8 FGR32:$v, addrRegImm:$a)>; - def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>; - def : MipsPat<(store FGR64:$v, addrRegImm:$a), - (SDC164_P8 FGR64:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LWC1_P8, f32, load>; + def : StoreRegImmPat<SWC1_P8, f32>; + def : LoadRegImmPat<LDC164_P8, f64, load>; + def : StoreRegImmPat<SDC164_P8, f64>; } let Predicates = [NotN64, HasStdEnc] in { - def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>; - def : MipsPat<(store FGR32:$v, addrRegImm:$a), - (SWC1 FGR32:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LWC1, f32, load>; + def : StoreRegImmPat<SWC1, f32>; } let Predicates = [NotN64, HasMips64, HasStdEnc] in { - def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>; - def : MipsPat<(store FGR64:$v, addrRegImm:$a), - (SDC164 FGR64:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LDC164, f64, load>; + def : StoreRegImmPat<SDC164, f64>; } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>; - def : MipsPat<(store AFGR64:$v, addrRegImm:$a), - (SDC1 AFGR64:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LDC1, f64, load>; + def : StoreRegImmPat<SDC1, f64>; } } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 3cd9088140..8c05d97bea 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -86,6 +86,36 @@ public: /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); + } + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); + } + + virtual void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const = 0; + + virtual void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const = 0; + protected: bool isZeroImm(const MachineOperand &op) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 25b5d240be..3a82e81713 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -23,13 +23,16 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisInt<4>]>; def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_MipsMAddMSub : SDTypeProfile<0, 4, - [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisSameAs<2, 3>]>; -def SDT_MipsDivRem : SDTypeProfile<0, 2, - [SDTCisInt<0>, - SDTCisSameAs<0, 1>]>; +def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>, + SDTCisVT<2, i32>]>; +def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, + SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def SDT_MipsMAddMSub : SDTypeProfile<1, 3, + [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, + SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; @@ -82,20 +85,27 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; +// Node used to extract integer from LO/HI register. +def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>; + +// Node used to insert 32-bit integers to LOHI register pair. +def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>; + +// Mult nodes. +def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>; +def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>; + // MAdd*/MSub* nodes -def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; -def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; -def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; -def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; +def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>; +def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>; +def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>; +def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>; // DivRem(u) nodes -def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem, - [SDNPOutGlue]>; -def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, +def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>; +def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>; +def MipsDivRem16 : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>; +def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16, [SDNPOutGlue]>; // Target constant nodes that are not part of any isel patterns and remain @@ -256,6 +266,7 @@ def mem : Operand<i32> { let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem64 : Operand<i64> { @@ -263,18 +274,21 @@ def mem64 : Operand<i64> { let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem_ea : Operand<i32> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } def mem_ea_64 : Operand<i64> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } // size operand of ext instruction @@ -378,10 +392,9 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO, } // Arithmetic Multiply ADD/SUB -class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> : +class MArithR<string opstr, bit isComm = 0> : InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt), - !strconcat(opstr, "\t$rs, $rt"), - [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> { + !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> { let Defs = [HI, LO]; let Uses = [HI, LO]; let isCommutable = isComm; @@ -427,33 +440,39 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, // Memory Load/Store class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC, - Operand MemOpnd> : + Operand MemOpnd, ComplexPattern Addr> : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addr:$addr))], NoItinerary, FrmI> { + [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let canFoldAsLoad = 1; + let mayLoad = 1; } class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC, - Operand MemOpnd> : + Operand MemOpnd, ComplexPattern Addr> : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> { + [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; + let mayStore = 1; } multiclass LoadM<string opstr, RegisterClass RC, - SDPatternOperator OpNode = null_frag> { - def NAME : Load<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>; - def _P8 : Load<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> { + SDPatternOperator OpNode = null_frag, + ComplexPattern Addr = addr> { + def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Load<opstr, OpNode, RC, mem64, Addr>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } multiclass StoreM<string opstr, RegisterClass RC, - SDPatternOperator OpNode = null_frag> { - def NAME : Store<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>; - def _P8 : Store<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> { + SDPatternOperator OpNode = null_frag, + ComplexPattern Addr = addr> { + def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Store<opstr, OpNode, RC, mem64, Addr>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } @@ -623,11 +642,34 @@ class Mult<string opstr, InstrItinClass itin, RegisterOperand RO, let neverHasSideEffects = 1; } -class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO, +// Pseudo multiply/divide instruction with explicit accumulator register +// operands. +class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1, + SDPatternOperator OpNode, InstrItinClass Itin, + bit IsComm = 1, bit HasSideEffects = 0> : + PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt), + [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>, + PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> { + let isCommutable = IsComm; + let hasSideEffects = HasSideEffects; +} + +// Pseudo multiply add/sub instruction with explicit accumulator register +// operands. +class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode> + : PseudoSE<(outs ACRegs:$ac), + (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin), + [(set ACRegs:$ac, + (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))], + IIImul>, + PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> { + string Constraints = "$acin = $ac"; +} + +class Div<string opstr, InstrItinClass itin, RegisterOperand RO, list<Register> DefRegs> : - InstSE<(outs), (ins RO:$rs, RO:$rt), - !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin, - FrmR> { + InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"), + [], itin, FrmR> { let Defs = DefRegs; } @@ -790,6 +832,14 @@ let usesCustomInserter = 1 in { defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>; } +/// Pseudo instructions for loading, storing and copying accumulator registers. +let isPseudo = 1 in { + defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>; + defm STORE_AC64 : StoreM<"store_ac64", ACRegs>; +} + +def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>; + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -845,10 +895,10 @@ let Predicates = [HasMips32r2, HasStdEnc] in { /// Load and Store Instructions /// aligned defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>; -defm LBu : LoadM<"lbu", CPURegs, zextloadi8>, LW_FM<0x24>; -defm LH : LoadM<"lh", CPURegs, sextloadi16>, LW_FM<0x21>; +defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>; +defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>; defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>; -defm LW : LoadM<"lw", CPURegs, load>, LW_FM<0x23>; +defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>; defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>; defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>; defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>; @@ -920,10 +970,13 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { /// Multiply and Divide Instructions. def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; -def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>, - MULT_FM<0, 0x1a>; -def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>, - MULT_FM<0, 0x1b>; +def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>; +def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>; +def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>; +def UDIV : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>; +def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>; +def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv, + 0>; def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>; def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>; @@ -951,10 +1004,14 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>; // MADD*/MSUB* -def MADD : MArithR<"madd", MipsMAdd, 1>, MULT_FM<0x1c, 0>; -def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>; -def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>; -def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>; +def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>; +def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>; +def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>; +def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>; +def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>; +def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>; +def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>; +def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>; def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM; @@ -997,6 +1054,9 @@ def : InstAlias<"and $rs, $rt, $imm", def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>, Requires<[NotMips64]>; def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>; +def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>; +def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>, + Requires<[NotMips64]>; def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>; def : InstAlias<"neg $rt, $rs", @@ -1008,6 +1068,9 @@ def : InstAlias<"slt $rs, $rt, $imm", def : InstAlias<"xor $rs, $rt, $imm", (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, Requires<[NotMips64]>; +def : InstAlias<"or $rs, $rt, $imm", + (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, + Requires<[NotMips64]>; def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; @@ -1043,6 +1106,13 @@ def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>; // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// +// Load/store pattern templates. +class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> : + MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>; + +class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> : + MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>; + // Small immediates def : MipsPat<(i32 immSExt16:$in), (ADDiu ZERO, imm:$in)>; @@ -1220,6 +1290,24 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // bswap pattern def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; +// mflo/hi patterns. +def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>; + +// Load halfword/word patterns. +let AddedComplexity = 40 in { + let Predicates = [NotN64, HasStdEnc] in { + def : LoadRegImmPat<LBu, i32, zextloadi8>; + def : LoadRegImmPat<LH, i32, sextloadi16>; + def : LoadRegImmPat<LW, i32, load>; + } + let Predicates = [IsN64, HasStdEnc] in { + def : LoadRegImmPat<LBu_P8, i32, zextloadi8>; + def : LoadRegImmPat<LH_P8, i32, sextloadi16>; + def : LoadRegImmPat<LW_P8, i32, load>; + } +} + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 13b2a6ac17..5ed5124139 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -58,7 +58,8 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; @@ -67,6 +68,9 @@ public: unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + /// \brief Return GPR register class. + virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f93dd86c17..64458bcef7 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -18,6 +18,10 @@ def sub_lo : SubRegIndex; def sub_hi : SubRegIndex; } +class Unallocatable { + bit isAllocatable = 0; +} + // We have banks of 32 registers each. class MipsReg<bits<16> Enc, string n> : Register<n> { let HWEncoding = Enc; @@ -54,6 +58,13 @@ class AFPR64<bits<16> Enc, string n, list<Register> subregs> let SubRegIndices = [sub_32]; } +// Accumulator Registers +class ACC<bits<16> Enc, string n, list<Register> subregs> + : MipsRegWithSubRegs<Enc, n, subregs> { + let SubRegIndices = [sub_lo, sub_hi]; + let CoveredBySubRegs = 1; +} + // Mips Hardware Registers class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>; @@ -219,7 +230,13 @@ let Namespace = "Mips" in { // Hi/Lo registers def HI : Register<"hi">, DwarfRegNum<[64]>; + def HI1 : Register<"hi1">, DwarfRegNum<[176]>; + def HI2 : Register<"hi2">, DwarfRegNum<[178]>; + def HI3 : Register<"hi3">, DwarfRegNum<[180]>; def LO : Register<"lo">, DwarfRegNum<[65]>; + def LO1 : Register<"lo1">, DwarfRegNum<[177]>; + def LO2 : Register<"lo2">, DwarfRegNum<[179]>; + def LO3 : Register<"lo3">, DwarfRegNum<[181]>; let SubRegIndices = [sub_32] in { def HI64 : RegisterWithSubRegs<"hi", [HI]>; @@ -240,11 +257,12 @@ let Namespace = "Mips" in { def HWR29_64 : MipsReg<29, "29">; // Accum registers - let SubRegIndices = [sub_lo, sub_hi] in - def AC0 : MipsRegWithSubRegs<0, "ac0", [LO, HI]>; - def AC1 : MipsReg<1, "ac1">; - def AC2 : MipsReg<2, "ac2">; - def AC3 : MipsReg<3, "ac3">; + def AC0 : ACC<0, "ac0", [LO, HI]>; + def AC1 : ACC<1, "ac1", [LO1, HI1]>; + def AC2 : ACC<2, "ac2", [LO2, HI2]>; + def AC3 : ACC<3, "ac3", [LO3, HI3]>; + + def AC0_64 : ACC<0, "ac0", [LO64, HI64]>; def DSPCtrl : Register<"dspctrl">; } @@ -291,9 +309,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add // Callee save S0, S1)>; -def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable; -def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -319,18 +337,28 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; +def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable; // Hardware registers -def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; -def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; +def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; +def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable; // Accumulator Registers -def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; +def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> { + let Size = 64; +} + +def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> { + let Size = 128; +} + +def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { + let Size = 64; +} def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 0dd671376f..68ec921888 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -29,6 +29,155 @@ using namespace llvm; +namespace { +typedef MachineBasicBlock::iterator Iter; + +/// Helper class to expand accumulator pseudos. +class ExpandACCPseudo { +public: + ExpandACCPseudo(MachineFunction &MF); + bool expand(); + +private: + bool expandInstr(MachineBasicBlock &MBB, Iter I); + void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + + MachineFunction &MF; + const MipsSEInstrInfo &TII; + const MipsRegisterInfo &RegInfo; + MachineRegisterInfo &MRI; +}; +} + +ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_) + : MF(MF_), + TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())), + RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {} + +bool ExpandACCPseudo::expand() { + bool Expanded = false; + + for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end(); + BB != BBEnd; ++BB) + for (Iter I = BB->begin(), End = BB->end(); I != End;) + Expanded |= expandInstr(*BB, I++); + + return Expanded; +} + +bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { + switch(I->getOpcode()) { + case Mips::LOAD_AC64: + case Mips::LOAD_AC64_P8: + case Mips::LOAD_AC_DSP: + case Mips::LOAD_AC_DSP_P8: + expandLoad(MBB, I, 4); + break; + case Mips::LOAD_AC128: + case Mips::LOAD_AC128_P8: + expandLoad(MBB, I, 8); + break; + case Mips::STORE_AC64: + case Mips::STORE_AC64_P8: + case Mips::STORE_AC_DSP: + case Mips::STORE_AC_DSP_P8: + expandStore(MBB, I, 4); + break; + case Mips::STORE_AC128: + case Mips::STORE_AC128_P8: + expandStore(MBB, I, 8); + break; + case Mips::COPY_AC64: + case Mips::COPY_AC_DSP: + expandCopy(MBB, I, 4); + break; + case Mips::COPY_AC128: + expandCopy(MBB, I, 8); + break; + default: + return false; + } + + MBB.erase(I); + return true; +} + +void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I, + unsigned RegSize) { + // load $vr0, FI + // copy lo, $vr0 + // load $vr1, FI + 4 + // copy hi, $vr1 + + assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); + + const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); + unsigned VR0 = MRI.createVirtualRegister(RC); + unsigned VR1 = MRI.createVirtualRegister(RC); + unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); + unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); + DebugLoc DL = I->getDebugLoc(); + const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + + TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0); + BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill); + TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize); + BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill); +} + +void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I, + unsigned RegSize) { + // copy $vr0, lo + // store $vr0, FI + // copy $vr1, hi + // store $vr1, FI + 4 + + assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); + + const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); + unsigned VR0 = MRI.createVirtualRegister(RC); + unsigned VR1 = MRI.createVirtualRegister(RC); + unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + unsigned SrcKill = getKillRegState(I->getOperand(0).isKill()); + unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo); + unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi); + DebugLoc DL = I->getDebugLoc(); + + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill); + TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill); + TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize); +} + +void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I, + unsigned RegSize) { + // copy $vr0, src_lo + // copy dst_lo, $vr0 + // copy $vr1, src_hi + // copy dst_hi, $vr1 + + const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); + unsigned VR0 = MRI.createVirtualRegister(RC); + unsigned VR1 = MRI.createVirtualRegister(RC); + unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); + unsigned SrcKill = getKillRegState(I->getOperand(1).isKill()); + unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); + unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); + unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo); + unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi); + DebugLoc DL = I->getDebugLoc(); + + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo) + .addReg(VR0, RegState::Kill); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi) + .addReg(VR1, RegState::Kill); +} + unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const { static const unsigned EhDataReg[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 @@ -246,7 +395,10 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // Reserve call frame if the size of the maximum call frame fits into 16-bit // immediate field and there are no variable sized objects on the stack. - return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); + // Make sure the second register scavenger spill slot can be accessed with one + // instruction. + return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) && + !MFI->hasVarSizedObjects(); } // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions @@ -284,6 +436,18 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (MipsFI->callsEhReturn()) MipsFI->createEhDataRegsFI(); + // Expand pseudo instructions which load, store or copy accumulators. + // Add an emergency spill slot if a pseudo was expanded. + if (ExpandACCPseudo(MF).expand()) { + // The spill slot should be half the size of the accumulator. If target is + // mips64, it should be 64-bit, otherwise it should be 32-bt. + const TargetRegisterClass *RC = STI.hasMips64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment(), false); + RS->addScavengingFrameIndex(FI); + } + // Set scavenging frame index if necessary. uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + estimateStackSize(MF); @@ -295,7 +459,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), RC->getAlignment(), false); - RS->setScavengingFrameIndex(FI); + RS->addScavengingFrameIndex(FI); } const MipsFrameLowering * diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 7becd25455..193a66cc65 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -21,7 +21,7 @@ namespace llvm { class MipsSEFrameLowering : public MipsFrameLowering { public: explicit MipsSEFrameLowering(const MipsSubtarget &STI) - : MipsFrameLowering(STI) {} + : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp new file mode 100644 index 0000000000..d6d220750c --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -0,0 +1,473 @@ +//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "MipsSEISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + + +bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E;) { + MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); + MachineInstr *MI = MO.getParent(); + ++U; + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC; + + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + + if (Subtarget.isABI_N64()) { + MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); + + // lui $v0, %hi(%neg(%gp_rel(fname))) + // daddu $v1, $v0, $t9 + // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + if (MF.getTarget().getRelocationModel() == Reloc::Static) { + // Set global register to __gnu_local_gp. + // + // lui $v0, %hi(__gnu_local_gp) + // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + return; + } + + MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + assert(Subtarget.isABI_O32()); + + // For O32 ABI, the following instruction sequence is emitted to initialize + // the global base register: + // + // 0. lui $2, %hi(_gp_disp) + // 1. addiu $2, $2, %lo(_gp_disp) + // 2. addu $globalbasereg, $2, $t9 + // + // We emit only the last instruction here. + // + // GNU linker requires that the first two instructions appear at the beginning + // of a function and no instructions be inserted before or between them. + // The two instructions are emitted during lowering to MC layer in order to + // avoid any reordering. + // + // Register $2 (Mips::V0) is added to the list of live-in registers to ensure + // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) + // reads it. + MF.getRegInfo().addLiveIn(Mips::V0); + MBB.addLiveIn(Mips::V0); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) + .addReg(Mips::V0).addReg(Mips::T9); +} + +void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + replaceUsesWithZeroReg(MRI, *I); +} + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, DebugLoc DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, + SDValue(Carry, 0), RHS); + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry, 0)); +} + +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + EVT ValTy = Addr.getValueType(); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; + } + + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + } + + return false; +} + +bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); + return true; +} + +bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + SDNode *Result; + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); + return std::make_pair(true, Result); + } + + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); + + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + /// Special Muls + case ISD::MUL: { + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && NodeTy == MVT::i32) + break; + MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; + Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; + return std::make_pair(true, Result); + } + case ISD::MULHS: + case ISD::MULHU: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); + + Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget.hasMips64()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO_64, MVT::i64); + Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); + } else { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO, MVT::i32); + Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, + Zero); + } + + return std::make_pair(true, Result); + } + break; + } + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); + unsigned Size = CN->getValueSizeInBits(0); + + if (Size == 32) + break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); + } + + return std::make_pair(true, RegOpnd); + } + + case MipsISD::ThreadPointer: { + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = + CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), + CurDAG->getRegister(SrcReg, PtrVT)); + SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, + SDValue(Rdhwr, 0)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + ReplaceUses(SDValue(Node, 0), ResNode); + return std::make_pair(true, ResNode.getNode()); + } + + case MipsISD::InsertLOHI: { + unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID : + Mips::ACRegsRegClassID; + SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32); + SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32); + SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32); + const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx, + Node->getOperand(1), HiIdx }; + SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, + MVT::Untyped, Ops, 5); + return std::make_pair(true, Res); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { + return new MipsSEDAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h new file mode 100644 index 0000000000..6137ab040b --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -0,0 +1,57 @@ +//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEISELDAGTODAG_H +#define MIPSSEISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { + +public: + explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); + + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl, + EVT Ty, bool HasLo, bool HasHi); + + SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + DebugLoc DL, SDNode *Node) const; + + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); +}; + +FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp new file mode 100644 index 0000000000..4f219218d3 --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -0,0 +1,442 @@ +//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// +#include "MipsSEISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + +MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + + if (HasMips64) + addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); + + if (Subtarget->hasDSP()) { + MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } + } + + if (!TM.Options.UseSoftFloat) { + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, &Mips::FGR64RegClass); + else + addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); + } + } + + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::MULHS, MVT::i32, Custom); + setOperationAction(ISD::MULHU, MVT::i32, Custom); + + if (HasMips64) + setOperationAction(ISD::MUL, MVT::i64, Custom); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + + setTargetDAGCombine(ISD::ADDE); + setTargetDAGCombine(ISD::SUBE); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { + return new MipsSETargetLowering(TM); +} + + +bool +MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } +} + +SDValue MipsSETargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch(Op.getOpcode()) { + case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); + case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); + case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); + case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); + case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); + case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); + case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG); + } + + return MipsTargetLowering::LowerOperation(Op, DAG); +} + +// selectMADD - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc multLo, Lo0), (adde multHi, Hi0), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { + // ADDENode's second operand must be a flag output of an ADDC node in order + // for the matching to be successful. + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); + + if (ADDCNode->getOpcode() != ISD::ADDC) + return false; + + SDValue MultHi = ADDENode->getOperand(0); + SDValue MultLo = ADDCNode->getOperand(0); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MADD only if ADDENode and ADDCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than ADDENode or ADDCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MADD instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + DebugLoc DL = ADDENode->getDebugLoc(); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, + ADDCNode->getOperand(1), + ADDENode->getOperand(1)); + + // create MipsMAdd(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; + + SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of adde and addc here + if (!SDValue(ADDCNode, 0).use_empty()) { + SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); + SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd, + LoIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); + } + if (!SDValue(ADDENode, 0).use_empty()) { + SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); + SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd, + HiIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); + } + + return true; +} + +// selectMSUB - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc Lo0, multLo), (sube Hi0, multHi), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { + // SUBENode's second operand must be a flag output of an SUBC node in order + // for the matching to be successful. + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); + + if (SUBCNode->getOpcode() != ISD::SUBC) + return false; + + SDValue MultHi = SUBENode->getOperand(1); + SDValue MultLo = SUBCNode->getOperand(1); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MSUB only if SUBENode and SUBCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than SUBENode or SUBCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MSUB instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + DebugLoc DL = SUBENode->getDebugLoc(); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, + SUBCNode->getOperand(0), + SUBENode->getOperand(0)); + + // create MipsSub(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; + + SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of sube and subc here + if (!SDValue(SUBCNode, 0).use_empty()) { + SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); + SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, + LoIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); + } + if (!SDValue(SUBENode, 0).use_empty()) { + SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); + SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, + HiIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); + } + + return true; +} + +static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + selectMADD(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + +static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + selectMSUB(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue +MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + switch (N->getOpcode()) { + case ISD::ADDE: + return performADDECombine(N, DAG, DCI, Subtarget); + case ISD::SUBE: + return performSUBECombine(N, DAG, DCI, Subtarget); + default: + return MipsTargetLowering::PerformDAGCombine(N, DCI); + } +} + +MachineBasicBlock * +MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::BPOSGE32_PSEUDO: + return emitBPOSGE32(MI, BB); + } +} + +bool MipsSETargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + if (!EnableMipsTailCalls) + return false; + + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +void MipsSETargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); + } else + Ops.push_back(Callee); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, + bool HasLo, bool HasHi, + SelectionDAG &DAG) const { + EVT Ty = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, + Op.getOperand(0), Op.getOperand(1)); + SDValue Lo, Hi; + + if (HasLo) + Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, + DAG.getConstant(Mips::sub_lo, MVT::i32)); + if (HasHi) + Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, + DAG.getConstant(Mips::sub_hi, MVT::i32)); + + if (!HasLo || !HasHi) + return HasLo ? Lo : Hi; + + SDValue Vals[] = { Lo, Hi }; + return DAG.getMergeValues(Vals, 2, DL); +} + +MachineBasicBlock * MipsSETargetLowering:: +emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h new file mode 100644 index 0000000000..186f6a343d --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -0,0 +1,62 @@ +//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MipsSEISELLOWERING_H +#define MipsSEISELLOWERING_H + +#include "MipsISelLowering.h" +#include "MipsRegisterInfo.h" + +namespace llvm { + class MipsSETargetLowering : public MipsTargetLowering { + public: + explicit MipsSETargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass : + &Mips::ACRegsRegClass; + + return TargetLowering::getRepRegClassFor(VT); + } + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, + SelectionDAG &DAG) const; + + MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; + }; +} + +#endif // MipsSEISELLOWERING_H diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index a9809ef712..9d08172081 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -136,6 +136,12 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (Mips::FGR64RegClass.contains(DestReg)) Opc = Mips::DMTC1; } + else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg)) + Opc = Mips::COPY_AC64; + else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg)) + Opc = Mips::COPY_AC_DSP; + else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg)) + Opc = Mips::COPY_AC128; assert(Opc && "Cannot copy registers"); @@ -152,10 +158,10 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void MipsSEInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, + int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); @@ -166,6 +172,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::SW_P8 : Mips::SW; else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::SD_P8 : Mips::SD; + else if (Mips::ACRegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64; + else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP; + else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) @@ -175,15 +187,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } void MipsSEInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ +loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); @@ -193,6 +203,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LW_P8 : Mips::LW; else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LD_P8 : Mips::LD; + else if (Mips::ACRegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64; + else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP; + else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) @@ -201,7 +217,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) .addMemOperand(MMO); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 3e22b33ed7..0bf7876f0f 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -49,17 +49,19 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + virtual void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + virtual void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index a39b393e4e..96967380b2 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -54,6 +54,15 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } +const TargetRegisterClass * +MipsSERegisterInfo::intRegClass(unsigned Size) const { + if (Size == 4) + return &Mips::CPURegsRegClass; + + assert(Size == 8); + return &Mips::CPU64RegsRegClass; +} + void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index f6827e9663..2f7c37bb46 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -31,6 +31,8 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + virtual const TargetRegisterClass *intRegClass(unsigned Size) const; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index fd930f0335..33363580ab 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c4928c21eb..7e5f192264 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine { const DataLayout DL; // Calculates type size & alignment OwningPtr<const MipsInstrInfo> InstrInfo; OwningPtr<const MipsFrameLowering> FrameLowering; - MipsTargetLowering TLInfo; + OwningPtr<const MipsTargetLowering> TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; @@ -63,7 +63,7 @@ public: } virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; + return TLInfo.get(); } virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 47baef6696..7da2fed4cd 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp + NVVMReflect.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 454583850b..b3e8b5d262 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -52,25 +52,24 @@ enum PropertyAnnotation { }; const unsigned AnnotationNameLen = 8; // length of each annotation name -const char -PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { - "maxntidx", // PROPERTY_MAXNTID_X - "maxntidy", // PROPERTY_MAXNTID_Y - "maxntidz", // PROPERTY_MAXNTID_Z - "reqntidx", // PROPERTY_REQNTID_X - "reqntidy", // PROPERTY_REQNTID_Y - "reqntidz", // PROPERTY_REQNTID_Z - "minctasm", // PROPERTY_MINNCTAPERSM - "texture", // PROPERTY_ISTEXTURE - "surface", // PROPERTY_ISSURFACE - "sampler", // PROPERTY_ISSAMPLER - "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM - "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM - "kernel", // PROPERTY_ISKERNEL_FUNCTION - "align", // PROPERTY_ALIGN +const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { + "maxntidx", // PROPERTY_MAXNTID_X + "maxntidy", // PROPERTY_MAXNTID_Y + "maxntidz", // PROPERTY_MAXNTID_Z + "reqntidx", // PROPERTY_REQNTID_X + "reqntidy", // PROPERTY_REQNTID_Y + "reqntidz", // PROPERTY_REQNTID_Z + "minctasm", // PROPERTY_MINNCTAPERSM + "texture", // PROPERTY_ISTEXTURE + "surface", // PROPERTY_ISSURFACE + "sampler", // PROPERTY_ISSAMPLER + "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM + "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "kernel", // PROPERTY_ISKERNEL_FUNCTION + "align", // PROPERTY_ALIGN - // last property - "proplast", // PROPERTY_LAST + // last property + "proplast", // PROPERTY_LAST }; // name of named metadata used for global annotations @@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { // compiling those .cpp files, hence __attribute__((unused)). __attribute__((unused)) #endif -static const char* NamedMDForAnnotations = "nvvm.annotations"; + static const char *NamedMDForAnnotations = "nvvm.annotations"; } - #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 619181994a..459cd96cb0 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -23,10 +23,9 @@ bool CompileForDebugging; // compile for debugging static cl::opt<bool, true> Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden, - cl::location(CompileForDebugging), - cl::init(false)); + cl::location(CompileForDebugging), cl::init(false)); -void NVPTXMCAsmInfo::anchor() { } +void NVPTXMCAsmInfo::anchor() {} NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); @@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Data32bitsDirective = " .b32 "; Data64bitsDirective = " .b64 "; PrivateGlobalPrefix = ""; - ZeroDirective = " .b8"; + ZeroDirective = " .b8"; AsciiDirective = " .b8"; AscizDirective = " .b8"; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 44aa01ca6e..ccd29705df 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -28,7 +28,6 @@ #define GET_REGINFO_MC_DESC #include "NVPTXGenRegisterInfo.inc" - using namespace llvm; static MCInstrInfo *createNVPTXMCInstrInfo() { @@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { return X; } -static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { +static MCSubtargetInfo * +createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); return X; } -static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { +static MCCodeGenInfo *createNVPTXMCCodeGenInfo( + StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); X->InitMCCodeGenInfo(RM, CM, OL); return X; } - // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { // Register the MC asm info. diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index b5684883fc..d6c79b5110 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_SUPPORT_MANAGED_STRING_H #define LLVM_SUPPORT_MANAGED_STRING_H diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index b46ea881c4..6a53a443bf 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -41,18 +41,24 @@ enum CondCodes { inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { switch (CC) { - case NVPTXCC::NE: return "ne"; - case NVPTXCC::EQ: return "eq"; - case NVPTXCC::LT: return "lt"; - case NVPTXCC::LE: return "le"; - case NVPTXCC::GT: return "gt"; - case NVPTXCC::GE: return "ge"; + case NVPTXCC::NE: + return "ne"; + case NVPTXCC::EQ: + return "eq"; + case NVPTXCC::LT: + return "lt"; + case NVPTXCC::LE: + return "le"; + case NVPTXCC::GT: + return "gt"; + case NVPTXCC::GE: + return "ge"; } llvm_unreachable("Unknown condition code"); } -FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel); +FunctionPass * +createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); @@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *); extern Target TheNVPTXTarget32; extern Target TheNVPTXTarget64; -namespace NVPTX -{ +namespace NVPTX { enum DrvInterface { NVCL, CUDA, @@ -102,7 +107,7 @@ enum LoadStore { }; namespace PTXLdStInstCode { -enum AddressSpace{ +enum AddressSpace { GENERIC = 0, GLOBAL = 1, CONSTANT = 2, diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index 7aee3595c6..d78b4e81a3 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td" //===----------------------------------------------------------------------===// // SM Versions -def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", - "Target SM 1.0">; -def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", - "Target SM 1.1">; -def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", - "Target SM 1.2">; -def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", - "Target SM 1.3">; def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", "Target SM 2.0">; def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", @@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [SM10]>; -def : Proc<"sm_11", [SM11]>; -def : Proc<"sm_12", [SM12]>; -def : Proc<"sm_13", [SM13]>; def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 60f52a46da..0f792ec682 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -19,9 +19,9 @@ namespace llvm { bool NVPTXAllocaHoisting::runOnFunction(Function &function) { - bool functionModified = false; - Function::iterator I = function.begin(); - TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); + bool functionModified = false; + Function::iterator I = function.begin(); + TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); for (Function::iterator E = function.end(); I != E; ++I) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { @@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { } char NVPTXAllocaHoisting::ID = 1; -RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting", - "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +RegisterPass<NVPTXAllocaHoisting> +X("alloca-hoisting", "Hoisting alloca instructions in non-entry " + "blocks to the entry block"); -FunctionPass *createAllocaHoisting() { - return new NVPTXAllocaHoisting(); -} +FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0115e1f5d3..ce5d78afa3 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -47,7 +47,6 @@ #include <sstream> using namespace llvm; - #include "NVPTXGenAsmWriter.inc" bool RegAllocNilUsed = true; @@ -59,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers", cl::desc("NVPTX Specific: Emit Line numbers even without -G"), cl::init(true)); -namespace llvm { -bool InterleaveSrcInPtx = false; -} - -static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Emit source line in ptx file"), - cl::location(llvm::InterleaveSrcInPtx)); +namespace llvm { bool InterleaveSrcInPtx = false; } +static cl::opt<bool, true> +InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Emit source line in ptx file"), + cl::location(llvm::InterleaveSrcInPtx)); namespace { /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V /// depends. -void DiscoverDependentGlobals(Value *V, - DenseSet<GlobalVariable*> &Globals) { +void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) Globals.insert(GV); else { @@ -88,12 +83,12 @@ void DiscoverDependentGlobals(Value *V, /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable /// instances to be emitted, but only after any dependents have been added /// first. -void VisitGlobalVariableForEmission(GlobalVariable *GV, - SmallVectorImpl<GlobalVariable*> &Order, - DenseSet<GlobalVariable*> &Visited, - DenseSet<GlobalVariable*> &Visiting) { +void VisitGlobalVariableForEmission( + GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order, + DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) { // Have we already visited this one? - if (Visited.count(GV)) return; + if (Visited.count(GV)) + return; // Do we have a circular dependency? if (Visiting.count(GV)) @@ -103,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV, Visiting.insert(GV); // Make sure we visit all dependents first - DenseSet<GlobalVariable*> Others; + DenseSet<GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - - for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), - E = Others.end(); I != E; ++I) + + for (DenseSet<GlobalVariable *>::iterator I = Others.begin(), + E = Others.end(); + I != E; ++I) VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); // Now we can visit ourself @@ -142,25 +138,23 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { if (CE == 0) llvm_unreachable("Unknown constant value to lower!"); - switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) return LowerConstant(C, AP); // Otherwise report the problem to the user. { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - WriteAsOperand(OS, CE, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/ false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); @@ -182,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // expression properly. This is important for differences between // blockaddress labels. Since the two labels are in the same function, it // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. + // FALL THROUGH. case Instruction::BitCast: return LowerConstant(CE->getOperand(0), AP); @@ -192,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), - false/*ZExt*/); + false /*ZExt*/); return LowerConstant(Op, AP); } @@ -214,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + const MCExpr *MaskExpr = + MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - // The MC library also has a right-shift operator, but it isn't consistently + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: case Instruction::Sub: @@ -232,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { - default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + default: + llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: + return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: + return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: + return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: + return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: + return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: + return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: + return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); + case Instruction::Xor: + return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); } } } } - -void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) -{ +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) return; if (ignoreLoc(MI)) @@ -268,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (curLoc.isUnknown()) return; - const MachineFunction *MF = MI.getParent()->getParent(); //const TargetMachine &TM = MF->getTarget(); @@ -289,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (filenameMap.find(fileName.str()) == filenameMap.end()) return; - // Emit the line from the source file. if (llvm::InterleaveSrcInPtx) this->emitSrcInText(fileName.str(), curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] - << " " << curLoc.getLine() << " " << curLoc.getCol(); + temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + << " " << curLoc.getCol(); OutStreamer.EmitRawText(Twine(temp.str().c_str())); } @@ -309,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(OS.str()); } -void NVPTXAsmPrinter::printReturnValStr(const Function *F, - raw_ostream &O) -{ +void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -329,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; + if (size < 32) + size = 32; } else { - assert(Ty->isFloatingPointTy() && - "Floating point type expected here"); + assert(Ty->isFloatingPointTy() && "Floating point type expected here"); size = Ty->getPrimitiveSizeInBits(); } O << ".param .b" << size << " func_retval0"; - } - else if (isa<PointerType>(Ty)) { + } else if (isa<PointerType>(Ty)) { O << ".param .b" << TLI->getPointerTy().getSizeInBits() - << " func_retval0"; + << " func_retval0"; } else { - if ((Ty->getTypeID() == Type::StructTyID) || - isa<VectorType>(Ty)) { + if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) retAlignment = TD->getABITypeAlignment(Ty); - O << ".param .align " - << retAlignment - << " .b8 func_retval0[" - << totalsz << "]"; + O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz + << "]"; } else - assert(false && - "Unknown return type"); + assert(false && "Unknown return type"); } } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -383,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " func_retval" << idx; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -411,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { // Set up MRI = &MF->getRegInfo(); F = MF->getFunction(); - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else { @@ -434,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { void NVPTXAsmPrinter::EmitFunctionBodyStart() { const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); unsigned numRegClasses = TRI.getNumRegClasses(); - VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; + VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; OutStreamer.EmitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); @@ -446,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() { void NVPTXAsmPrinter::EmitFunctionBodyEnd() { OutStreamer.EmitRawText(StringRef("}\n")); - delete []VRidGlobal2LocalMap; + delete[] VRidGlobal2LocalMap; } - -void -NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const { +void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, + raw_ostream &O) const { // If the NVVM IR has some of reqntid* specified, then output // the reqntid directive, and set the unspecified ones to 1. // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; - else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; - else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; - else specified = true; + if (llvm::getReqNTIDx(F, reqntidx) == false) + reqntidx = 1; + else + specified = true; + if (llvm::getReqNTIDy(F, reqntidy) == false) + reqntidy = 1; + else + specified = true; + if (llvm::getReqNTIDz(F, reqntidz) == false) + reqntidz = 1; + else + specified = true; if (specified) - O << ".reqntid " << reqntidx << ", " - << reqntidy << ", " << reqntidz << "\n"; + O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz + << "\n"; // If the NVVM IR has some of maxntid* specified, then output // the maxntid directive, and set the unspecified ones to 1. // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; - else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; - else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; - else specified = true; + if (llvm::getMaxNTIDx(F, maxntidx) == false) + maxntidx = 1; + else + specified = true; + if (llvm::getMaxNTIDy(F, maxntidy) == false) + maxntidy = 1; + else + specified = true; + if (llvm::getMaxNTIDz(F, maxntidz) == false) + maxntidz = 1; + else + specified = true; if (specified) - O << ".maxntid " << maxntidx << ", " - << maxntidy << ", " << maxntidz << "\n"; + O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz + << "\n"; unsigned mincta; if (llvm::getMinCTASm(F, mincta)) O << ".minnctapersm " << mincta << "\n"; } -void -NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, - raw_ostream &O) { - const TargetRegisterClass * RC = MRI->getRegClass(vr); +void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, + raw_ostream &O) { + const TargetRegisterClass *RC = MRI->getRegClass(vr); unsigned id = RC->getID(); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; @@ -506,44 +512,38 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, report_fatal_error("Bad register!"); } -void -NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, - raw_ostream &O) { +void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, + raw_ostream &O) { getVirtualRegisterName(vr, isVec, O); } -void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, - raw_ostream &O) { - static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; - int Imm = (int)MO.getImm(); - if(0 == strcmp(Modifier, "vecelem")) +void NVPTXAsmPrinter::printVecModifiedImmediate( + const MachineOperand &MO, const char *Modifier, raw_ostream &O) { + static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; + int Imm = (int) MO.getImm(); + if (0 == strcmp(Modifier, "vecelem")) O << "_" << vecelem[Imm]; - else if(0 == strcmp(Modifier, "vecv4comm1")) { - if((Imm < 0) || (Imm > 3)) + else if (0 == strcmp(Modifier, "vecv4comm1")) { + if ((Imm < 0) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4comm2")) { - if((Imm < 4) || (Imm > 7)) + } else if (0 == strcmp(Modifier, "vecv4comm2")) { + if ((Imm < 4) || (Imm > 7)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%4]; - } - else if(0 == strcmp(Modifier, "vecv2comm1")) { - if((Imm < 0) || (Imm > 1)) + } else if (0 == strcmp(Modifier, "vecv4pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 4]; + } else if (0 == strcmp(Modifier, "vecv2comm1")) { + if ((Imm < 0) || (Imm > 1)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2comm2")) { - if((Imm < 2) || (Imm > 3)) + } else if (0 == strcmp(Modifier, "vecv2comm2")) { + if ((Imm < 2) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%2]; - } - else + } else if (0 == strcmp(Modifier, "vecv2pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 2]; + } else llvm_unreachable("Unknown Modifier on immediate operand"); } @@ -565,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, emitVirtualRegister(MO.getReg(), true, O); else llvm_unreachable( - "Don't know how to handle the modifier on virtual register."); + "Don't know how to handle the modifier on virtual register."); } } return; @@ -576,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, else if (strstr(Modifier, "vec") == Modifier) printVecModifiedImmediate(MO, Modifier, O); else - llvm_unreachable("Don't know how to handle modifier on immediate operand"); + llvm_unreachable( + "Don't know how to handle modifier on immediate operand"); return; case MachineOperand::MO_FPImmediate: @@ -588,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_ExternalSymbol: { - const char * symbname = MO.getSymbolName(); + const char *symbname = MO.getSymbolName(); if (strstr(symbname, ".PARAM") == symbname) { unsigned index; - sscanf(symbname+6, "%u[];", &index); + sscanf(symbname + 6, "%u[];", &index); printParamName(index, O); - } - else if (strstr(symbname, ".HLPPARAM") == symbname) { + } else if (strstr(symbname, ".HLPPARAM") == symbname) { unsigned index; - sscanf(symbname+9, "%u[];", &index); + sscanf(symbname + 9, "%u[];", &index); O << *CurrentFnSym << "_param_" << index << "_offset"; - } - else + } else O << symbname; break; } @@ -613,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, } } -void NVPTXAsmPrinter:: -printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { +void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, + raw_ostream &O) const { #ifndef __OPTIMIZE__ O << "\t// Implicit def :"; //printOperand(MI, 0); @@ -628,32 +627,41 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, if (Modifier && !strcmp(Modifier, "add")) { O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } else { - if (MI->getOperand(opNum+1).isImm() && - MI->getOperand(opNum+1).getImm() == 0) + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) return; // don't print ',0' or '+0' O << "+"; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } } void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, - raw_ostream &O, const char *Modifier) -{ + raw_ostream &O, const char *Modifier) { if (Modifier) { const MachineOperand &MO = MI->getOperand(opNum); - int Imm = (int)MO.getImm(); + int Imm = (int) MO.getImm(); if (!strcmp(Modifier, "volatile")) { if (Imm) O << ".volatile"; } else if (!strcmp(Modifier, "addsp")) { switch (Imm) { - case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; - case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; - case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; - case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; - case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; + case NVPTX::PTXLdStInstCode::GLOBAL: + O << ".global"; + break; + case NVPTX::PTXLdStInstCode::SHARED: + O << ".shared"; + break; + case NVPTX::PTXLdStInstCode::LOCAL: + O << ".local"; + break; + case NVPTX::PTXLdStInstCode::PARAM: + O << ".param"; + break; + case NVPTX::PTXLdStInstCode::CONSTANT: + O << ".const"; + break; case NVPTX::PTXLdStInstCode::GENERIC: if (!nvptxSubtarget.hasGenericLdSt()) O << ".global"; @@ -661,31 +669,27 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, default: llvm_unreachable("Wrong Address Space"); } - } - else if (!strcmp(Modifier, "sign")) { - if (Imm==NVPTX::PTXLdStInstCode::Signed) + } else if (!strcmp(Modifier, "sign")) { + if (Imm == NVPTX::PTXLdStInstCode::Signed) O << "s"; - else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) + else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) O << "u"; else O << "f"; - } - else if (!strcmp(Modifier, "vec")) { - if (Imm==NVPTX::PTXLdStInstCode::V2) + } else if (!strcmp(Modifier, "vec")) { + if (Imm == NVPTX::PTXLdStInstCode::V2) O << ".v2"; - else if (Imm==NVPTX::PTXLdStInstCode::V4) + else if (Imm == NVPTX::PTXLdStInstCode::V4) O << ".v4"; - } - else + } else llvm_unreachable("Unknown Modifier"); - } - else + } else llvm_unreachable("Empty Modifier"); } -void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else @@ -696,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { O << ";\n"; } -static bool usedInGlobalVarDef(const Constant *C) -{ +static bool usedInGlobalVarDef(const Constant *C) { if (!C) return false; @@ -707,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C) return true; } - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { const Constant *C = dyn_cast<Constant>(*ui); if (usedInGlobalVarDef(C)) return true; @@ -716,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C) return false; } -static bool usedInOneFunc(const User *U, Function const *&oneFunc) -{ +static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { if (othergv->getName().str() == "llvm.used") return true; @@ -730,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) return false; oneFunc = curFunc; return true; - } - else + } else return false; } if (const MDNode *md = dyn_cast<MDNode>(U)) if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || - (md->getName().str() == "llvm.dbg.sp"))) + (md->getName().str() == "llvm.dbg.sp"))) return true; - - for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); - ui!=ue; ++ui) { + for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); + ui != ue; ++ui) { if (usedInOneFunc(*ui, oneFunc) == false) return false; } @@ -776,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { static bool useFuncSeen(const Constant *C, llvm::DenseMap<const Function *, bool> &seenMap) { - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { if (const Constant *cu = dyn_cast<Constant>(*ui)) { if (useFuncSeen(cu, seenMap)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { const BasicBlock *bb = I->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; if (seenMap.find(caller) != seenMap.end()) return true; } @@ -793,10 +795,9 @@ static bool useFuncSeen(const Constant *C, return false; } -void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { llvm::DenseMap<const Function *, bool> seenMap; - for (Module::const_iterator FI=M.begin(), FE=M.end(); - FI!=FE; ++FI) { + for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = FI; if (F->isDeclaration()) { @@ -808,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { emitDeclaration(F, O); continue; } - for (Value::const_use_iterator iter=F->use_begin(), - iterEnd=F->use_end(); iter!=iterEnd; ++iter) { + for (Value::const_use_iterator iter = F->use_begin(), + iterEnd = F->use_end(); + iter != iterEnd; ++iter) { if (const Constant *C = dyn_cast<Constant>(*iter)) { if (usedInGlobalVarDef(C)) { // The use is in the initialization of a global variable @@ -828,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { } } - if (!isa<Instruction>(*iter)) continue; + if (!isa<Instruction>(*iter)) + continue; const Instruction *instr = cast<Instruction>(*iter); const BasicBlock *bb = instr->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; // If a caller has already been seen, then the caller is // appearing in the module before the callee. so print out @@ -852,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DebugInfoFinder DbgFinder; DbgFinder.processModule(M); - unsigned i=1; + unsigned i = 1; for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { + E = DbgFinder.compile_unit_end(); + I != E; ++I) { DICompileUnit DIUnit(*I); StringRef Filename(DIUnit.getFilename()); StringRef Dirname(DIUnit.getDirectory()); @@ -871,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { + E = DbgFinder.subprogram_end(); + I != E; ++I) { DISubprogram SP(*I); StringRef Filename(SP.getFilename()); StringRef Dirname(SP.getDirectory()); @@ -887,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } } -bool NVPTXAsmPrinter::doInitialization (Module &M) { +bool NVPTXAsmPrinter::doInitialization(Module &M) { SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -899,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { //bool Result = AsmPrinter::doInitialization(M); // Initialize TargetLoweringObjectFile. - const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) - .Initialize(OutContext, TM); + const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) + .Initialize(OutContext, TM); Mang = new Mangler(OutContext, *TM.getDataLayout()); @@ -908,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitHeader(M, OS1); OutStreamer.EmitRawText(OS1.str()); - // Already commented out //bool Result = AsmPrinter::doInitialization(M); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); @@ -926,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { // global variable in order, and ensure that we emit it *after* its dependent // globals. We use a little extra memory maintaining both a set and a list to // have fast searches while maintaining a strict ordering. - SmallVector<GlobalVariable*,8> Globals; - DenseSet<GlobalVariable*> GVVisited; - DenseSet<GlobalVariable*> GVVisiting; + SmallVector<GlobalVariable *, 8> Globals; + DenseSet<GlobalVariable *> GVVisited; + DenseSet<GlobalVariable *> GVVisiting; // Visit each global variable, in order - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); - assert(GVVisited.size() == M.getGlobalList().size() && + assert(GVVisited.size() == M.getGlobalList().size() && "Missed a global variable"); assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); @@ -946,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { OS2 << '\n'; OutStreamer.EmitRawText(OS2.str()); - return false; // success + return false; // success } -void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; @@ -989,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { Module::GlobalListType &global_list = M.getGlobalList(); int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable* [n]; + GlobalVariable **gv_array = new GlobalVariable *[n]; // first, back-up GlobalVariable in gv_array i = 0; for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) + I != E; ++I) gv_array[i++] = &*I; // second, empty global_list @@ -1005,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { bool ret = AsmPrinter::doFinalization(M); // now we restore global variables - for (i = 0; i < n; i ++) + for (i = 0; i < n; i++) global_list.insert(global_list.end(), gv_array[i]); delete[] gv_array; return ret; - //bool Result = AsmPrinter::doFinalization(M); // Instead of calling the parents doFinalization, we may // clone parents doFinalization and customize here. @@ -1031,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { // external without init -> .extern // appending -> not allowed, assert. -void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) -{ +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, + raw_ostream &O) { if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa<GlobalVariable>(V)) { @@ -1059,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) } } - -void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool processDemoted) { // Skip meta data @@ -1111,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, if (Initializer) CI = dyn_cast<ConstantInt>(Initializer); if (CI) { - unsigned sample=CI->getZExtValue(); + unsigned sample = CI->getZExtValue(); O << " = { "; - for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> - __CLK_ADDRESS_BASE) ; i < 3 ; i++) { + for (int i = 0, + addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); + i < 3; i++) { O << "addr_mode_" << i << " = "; switch (addr) { - case 0: O << "wrap"; break; - case 1: O << "clamp_to_border"; break; - case 2: O << "clamp_to_edge"; break; - case 3: O << "wrap"; break; - case 4: O << "mirror"; break; + case 0: + O << "wrap"; + break; + case 1: + O << "clamp_to_border"; + break; + case 2: + O << "clamp_to_edge"; + break; + case 3: + O << "wrap"; + break; + case 4: + O << "mirror"; + break; } - O <<", "; + O << ", "; } O << "filter_mode = "; - switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { - case 0: O << "nearest"; break; - case 1: O << "linear"; break; - case 2: assert ( 0 && "Anisotropic filtering is not supported"); - default: O << "nearest"; break; + switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { + case 0: + O << "nearest"; + break; + case 1: + O << "linear"; + break; + case 2: + assert(0 && "Anisotropic filtering is not supported"); + default: + O << "nearest"; + break; } - if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { + if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { O << ", force_unnormalized_coords = 1"; } O << " }"; @@ -1176,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, else O << " .align " << GVar->getAlignment(); - if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { O << " ."; O << getPTXFundamentalTypeStr(ETy, false); @@ -1186,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and global state // spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); if (!Initializer->isNullValue()) { - O << " = " ; + O << " = "; printScalarConstant(Initializer, O); } } } else { - unsigned int ElementSize =0; + unsigned int ElementSize = 0; // Although PTX has direct support for struct type and array type and // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for @@ -1210,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and // global state spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); - if (!isa<UndefValue>(Initializer) && - !Initializer->isNullValue()) { + if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (nvptxSubtarget.is64Bit()) { - O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/8; - } - else { - O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/4; + O << " .u64 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 8; + } else { + O << " .u32 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 4; } O << "]"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar) << "["; O << ElementSize; O << "]"; } - O << " = {" ; + O << " = {"; aggBuffer.print(); O << "}"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } - } - else { + } else { O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } } @@ -1270,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { std::vector<GlobalVariable *> &gvars = localDecls[f]; - for (unsigned i=0, e=gvars.size(); i!=e; ++i) { + for (unsigned i = 0, e = gvars.size(); i != e; ++i) { O << "\t// demoted variable\n\t"; printModuleLevelGV(gvars[i], O, true); } @@ -1280,24 +1295,24 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const { switch (AddressSpace) { case llvm::ADDRESS_SPACE_LOCAL: - O << "local" ; + O << "local"; break; case llvm::ADDRESS_SPACE_GLOBAL: - O << "global" ; + O << "global"; break; case llvm::ADDRESS_SPACE_CONST: // This logic should be consistent with that in // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) if (nvptxSubtarget.hasGenericLdSt()) - O << "global" ; + O << "global"; else - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_SHARED: - O << "shared" ; + O << "shared"; break; default: report_fatal_error("Bad address space found while emitting PTX"); @@ -1305,8 +1320,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, } } -std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, - bool useB4PTR) const { +std::string +NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { switch (Ty->getTypeID()) { default: llvm_unreachable("unexpected type"); @@ -1330,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, return "f64"; case Type::PointerTyID: if (nvptxSubtarget.is64Bit()) - if (useB4PTR) return "b64"; - else return "u64"; + if (useB4PTR) + return "b64"; + else + return "u64"; + else if (useB4PTR) + return "b32"; else - if (useB4PTR) return "b32"; - else return "u32"; + return "u32"; } llvm_unreachable("unexpected type"); return NULL; } -void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); @@ -1364,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, return; } - int64_t ElementSize =0; + int64_t ElementSize = 0; // Although PTX has direct support for struct type and array type and LLVM IR // is very similar to PTX, the LLVM CodeGen does not support for targets that @@ -1375,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + O << " .b8 " << *Mang->getSymbol(GVar) << "["; if (ElementSize) { - O << itostr(ElementSize) ; + O << itostr(ElementSize); } O << "]"; break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } - return ; + return; } - -static unsigned int -getOpenCLAlignment(const DataLayout *TD, - Type *Ty) { +static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) return TD->getPrefTypeAlignment(Ty); @@ -1404,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int numE = VTy->getNumElements(); unsigned int alignE = TD->getPrefTypeAlignment(ETy); if (numE == 3) - return 4*alignE; + return 4 * alignE; else - return numE*alignE; + return numE * alignE; } const StructType *STy = dyn_cast<StructType>(Ty); @@ -1414,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int alignStruct = 1; // Go through each element of the struct and find the // largest alignment. - for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { Type *ETy = STy->getElementType(i); unsigned int align = getOpenCLAlignment(TD, ETy); if (align > alignStruct) @@ -1458,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { } for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { - if (i==paramIndex) { + if (i == paramIndex) { printParamName(I, paramIndex, O); return; } @@ -1466,8 +1481,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { llvm_unreachable("paramIndex out of bound"); } -void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, - raw_ostream &O) { +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const AttributeSet &PAL = F->getAttributes(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -1481,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "(\n"; for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!first) O << ",\n"; @@ -1496,14 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; else // Default image is read_only O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; - } - else // Should be llvm::isSampler(*I) + } else // Should be llvm::isSampler(*I) O << "\t.param .samplerref " << *CurrentFnSym << "_param_" - << paramIndex; + << paramIndex; continue; } - if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) { + if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (Ty->isVectorTy()) { + // Just print .param .b8 .align <a> .param[size]; + // <a> = PAL.getparamalignment + // size = typeallocsize of element type + unsigned align = PAL.getParamAlignment(paramIndex + 1); + if (align == 0) + align = TD->getABITypeAlignment(Ty); + + unsigned sz = TD->getTypeAllocSize(Ty); + O << "\t.param .align " << align << " .b8 "; + printParamName(I, paramIndex, O); + O << "[" << sz << "]"; + + continue; + } // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { @@ -1514,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); - switch(addrSpace) { + switch (addrSpace) { default: O << ".ptr "; break; @@ -1529,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << ".ptr .global "; break; } - O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; + O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; } printParamName(I, paramIndex, O); continue; } // non-pointer scalar to kernel func - O << "\t.param ." - << getPTXFundamentalTypeStr(Ty) << " "; + O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " "; printParamName(I, paramIndex, O); continue; } @@ -1546,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -1562,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // param has byVal attribute. So should be a pointer const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI || isKernelFunc) { // Just print .param .b8 .align <a> .param[size]; // <a> = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex+1); + unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) align = TD->getABITypeAlignment(ETy); unsigned sz = TD->getTypeAllocSize(ETy); - O << "\t.param .align " << align - << " .b8 "; + O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; @@ -1587,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // each vector element. SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -1595,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << "\t.reg .b" << sz << " "; printParamName(I, paramIndex, O); - if (j<je-1) O << ",\n"; + if (j < je - 1) + O << ",\n"; ++paramIndex; } - if (i<e-1) + if (i < e - 1) O << ",\n"; } --paramIndex; @@ -1620,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, emitFunctionParamList(F, O); } - -void NVPTXAsmPrinter:: -setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { +void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( + const MachineFunction &MF) { SmallString<128> Str; raw_svector_ostream O(Str); @@ -1635,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); int NumBytes = (int) MFI->getStackSize(); if (NumBytes) { - O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" - << DEPOTNAME - << getFunctionNumber() << "[" << NumBytes << "];\n"; + O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME + << getFunctionNumber() << "[" << NumBytes << "];\n"; if (nvptxSubtarget.is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; - } - else { + } else { O << "\t.reg .b32 \t%SP;\n"; O << "\t.reg .b32 \t%SPL;\n"; } @@ -1653,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { // register number and the per class virtual register number. // We use the per class virtual register number in the ptx output. unsigned int numVRs = MRI->getNumVirtRegs(); - for (unsigned i=0; i< numVRs; i++) { + for (unsigned i = 0; i < numVRs; i++) { unsigned int vr = TRI->index2VirtReg(i); const TargetRegisterClass *RC = MRI->getRegClass(vr); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; int n = regmap.size(); - regmap.insert(std::make_pair(vr, n+1)); + regmap.insert(std::make_pair(vr, n + 1)); } // Emit register declarations @@ -1702,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { OutStreamer.EmitRawText(O.str()); } - void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { - APFloat APF = APFloat(Fp->getValueAPF()); // make a copy + APFloat APF = APFloat(Fp->getValueAPF()); // make a copy bool ignored; unsigned int numHex; const char *lead; - if (Fp->getType()->getTypeID()==Type::FloatTyID) { + if (Fp->getType()->getTypeID() == Type::FloatTyID) { numHex = 8; lead = "0f"; - APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { numHex = 16; lead = "0d"; - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); } else llvm_unreachable("unsupported fp type"); @@ -1760,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { llvm_unreachable("Not scalar type found in printScalarConstant()"); } - void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1768,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); - if (s<Bytes) + if (s < Bytes) s = Bytes; aggBuffer->addZeros(s); return; @@ -1779,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); - if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ + if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); - } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { - short int16 = - (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); - ptr = (unsigned char*)&int16; + } else if (ETy == Type::getInt16Ty(CPV->getContext())) { + short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); - } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (ConstantInt *constInt = - dyn_cast<ConstantInt>(ConstantFoldConstantExpression( - Cexpr, TD))) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + if (ConstantInt *constInt = dyn_cast<ConstantInt>( + ConstantFoldConstantExpression(Cexpr, TD))) { + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1812,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } } llvm_unreachable("unsupported integer const type"); - } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + ConstantFoldConstantExpression(Cexpr, TD))) { + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1841,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::FloatTyID: case Type::DoubleTyID: { ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); - const Type* Ty = CFP->getType(); + const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { - float float32 = (float)CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char*)&float32; + float float32 = (float) CFP->getValueAPF().convertToFloat(); + ptr = (unsigned char *)&float32; aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char*)&float64; + ptr = (unsigned char *)&float64; aggBuffer->addBytes(ptr, 8, Bytes); - } - else { + } else { llvm_unreachable("unsupported fp const type"); } break; @@ -1859,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::PointerTyID: { if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { aggBuffer->addSymbol(GVar); - } - else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v); } @@ -1876,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, isa<ConstantStruct>(CPV)) { int ElementSize = TD->getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); - if ( Bytes > ElementSize ) - aggBuffer->addZeros(Bytes-ElementSize); - } - else if (isa<ConstantAggregateZero>(CPV)) + if (Bytes > ElementSize) + aggBuffer->addZeros(Bytes - ElementSize); + } else if (isa<ConstantAggregateZero>(CPV)) aggBuffer->addZeros(Bytes); else llvm_unreachable("Unexpected Constant type"); @@ -1905,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, } if (const ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { + dyn_cast<ConstantDataSequential>(CPV)) { if (CDS->getNumElements()) for (unsigned i = 0; i < CDS->getNumElements(); ++i) bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, @@ -1913,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, return; } - if (isa<ConstantStruct>(CPV)) { if (CPV->getNumOperands()) { StructType *ST = cast<StructType>(CPV->getType()); for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { - if ( i == (e - 1)) + if (i == (e - 1)) Bytes = TD->getStructLayout(ST)->getElementOffset(0) + - TD->getTypeAllocSize(ST) - - TD->getStructLayout(ST)->getElementOffset(i); + TD->getTypeAllocSize(ST) - + TD->getStructLayout(ST)->getElementOffset(i); else - Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - - TD->getStructLayout(ST)->getElementOffset(i); - bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, - aggBuffer); + Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - + TD->getStructLayout(ST)->getElementOffset(i); + bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); } } return; @@ -1937,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, // buildTypeNameMap - Run through symbol table looking for type names. // - bool NVPTXAsmPrinter::isImageType(const Type *Ty) { std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); - if (PI != TypeNameMap.end() && - (!PI->second.compare("struct._image1d_t") || - !PI->second.compare("struct._image2d_t") || - !PI->second.compare("struct._image3d_t"))) + if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || + !PI->second.compare("struct._image2d_t") || + !PI->second.compare("struct._image3d_t"))) return true; return false; @@ -1955,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) { /// bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. + if (ExtraCode[1] != 0) + return true; // Unknown modifier. switch (ExtraCode[0]) { default: @@ -1974,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { +bool NVPTXAsmPrinter::PrintAsmMemoryOperand( + const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier + return true; // Unknown modifier O << '['; printMemOperand(MI, OpNo, O); @@ -1989,41 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) -{ - switch(MI.getOpcode()) { +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; - case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: - case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: - case NVPTX::CallArgF64: case NVPTX::CallArgI16: - case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: - case NVPTX::CallArgI64: case NVPTX::CallArgI8: - case NVPTX::CallArgParam: case NVPTX::CallVoidInst: - case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: + case NVPTX::CallArgBeginInst: + case NVPTX::CallArgEndInst0: + case NVPTX::CallArgEndInst1: + case NVPTX::CallArgF32: + case NVPTX::CallArgF64: + case NVPTX::CallArgI16: + case NVPTX::CallArgI32: + case NVPTX::CallArgI32imm: + case NVPTX::CallArgI64: + case NVPTX::CallArgI8: + case NVPTX::CallArgParam: + case NVPTX::CallVoidInst: + case NVPTX::CallVoidInstReg: + case NVPTX::Callseq_End: case NVPTX::CallVoidInstReg64: - case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: - case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: - case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: - case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: - case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: - case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: - case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: - case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: - case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: - case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: - case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: - case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: - case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: - case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: - case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: - case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: - case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: - case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: - case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: - case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: - case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: - case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: + case NVPTX::DeclareParamInst: + case NVPTX::DeclareRetMemInst: + case NVPTX::DeclareRetRegInst: + case NVPTX::DeclareRetScalarInst: + case NVPTX::DeclareScalarParamInst: + case NVPTX::DeclareScalarRegInst: + case NVPTX::StoreParamF32: + case NVPTX::StoreParamF64: + case NVPTX::StoreParamI16: + case NVPTX::StoreParamI32: + case NVPTX::StoreParamI64: + case NVPTX::StoreParamI8: + case NVPTX::StoreParamS32I8: + case NVPTX::StoreParamU32I8: + case NVPTX::StoreParamS32I16: + case NVPTX::StoreParamU32I16: + case NVPTX::StoreRetvalF32: + case NVPTX::StoreRetvalF64: + case NVPTX::StoreRetvalI16: + case NVPTX::StoreRetvalI32: + case NVPTX::StoreRetvalI64: + case NVPTX::StoreRetvalI8: + case NVPTX::LastCallArgF32: + case NVPTX::LastCallArgF64: + case NVPTX::LastCallArgI16: + case NVPTX::LastCallArgI32: + case NVPTX::LastCallArgI32imm: + case NVPTX::LastCallArgI64: + case NVPTX::LastCallArgI8: + case NVPTX::LastCallArgParam: + case NVPTX::LoadParamMemF32: + case NVPTX::LoadParamMemF64: + case NVPTX::LoadParamMemI16: + case NVPTX::LoadParamMemI32: + case NVPTX::LoadParamMemI64: + case NVPTX::LoadParamMemI8: + case NVPTX::LoadParamRegF32: + case NVPTX::LoadParamRegF64: + case NVPTX::LoadParamRegI16: + case NVPTX::LoadParamRegI32: + case NVPTX::LoadParamRegI64: + case NVPTX::LoadParamRegI8: + case NVPTX::PrototypeInst: + case NVPTX::DBG_VALUE: return true; } return false; @@ -2035,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); } - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader * reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename.str()); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2049,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { this->OutStreamer.EmitRawText(Twine(temp.str())); } - LineReader *NVPTXAsmPrinter::getReader(std::string filename) { - if (reader == NULL) { - reader = new LineReader(filename); + if (reader == NULL) { + reader = new LineReader(filename); } if (reader->fileName() != filename) { delete reader; - reader = new LineReader(filename); + reader = new LineReader(filename); } return reader; } - -std::string -LineReader::readLine(unsigned lineNum) { +std::string LineReader::readLine(unsigned lineNum) { if (lineNum < theCurLine) { theCurLine = 0; - fstr.seekg(0,std::ios::beg); + fstr.seekg(0, std::ios::beg); } while (theCurLine < lineNum) { - fstr.getline(buff,500); + fstr.getline(buff, 500); theCurLine++; } return buff; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 42498f0bf7..6dc9fc0ffe 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -43,15 +43,15 @@ // This is defined in AsmPrinter.cpp. // Used to process the constant expressions in initializers. namespace nvptx { -const llvm::MCExpr *LowerConstant(const llvm::Constant *CV, - llvm::AsmPrinter &AP) ; +const llvm::MCExpr * +LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP); } namespace llvm { class LineReader { private: - unsigned theCurLine ; + unsigned theCurLine; std::ifstream fstr; char buff[512]; std::string theFileName; @@ -63,17 +63,12 @@ public: theFileName = filename; } std::string fileName() { return theFileName; } - ~LineReader() { - fstr.close(); - } + ~LineReader() { fstr.close(); } std::string readLine(unsigned line); }; - - class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { - class AggBuffer { // Used to buffer the emitted string for initializing global // aggregates. @@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // Once we have this AggBuffer setup, we can choose how to print // it out. public: - unsigned size; // size of the buffer in bytes + unsigned size; // size of the buffer in bytes unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses SmallVector<unsigned, 4> symbolPosInBuffer; @@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - :O(_O),AP(_AP) { + : O(_O), AP(_AP) { buffer = new unsigned char[_size]; size = _size; curpos = 0; numSymbols = 0; } - ~AggBuffer() { - delete [] buffer; - } + ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { - assert((curpos+Num) <= size); - assert((curpos+Bytes) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + assert((curpos + Bytes) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = Ptr[i]; - curpos ++; + curpos++; } - for ( int i=Num; i < Bytes ; ++i) { + for (int i = Num; i < Bytes; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } unsigned addZeros(int Num) { - assert((curpos+Num) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } @@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void print() { if (numSymbols == 0) { // print out in bytes - for (unsigned i=0; i<size; i++) { + for (unsigned i = 0; i < size; i++) { if (i) O << ", "; - O << (unsigned int)buffer[i]; + O << (unsigned int) buffer[i]; } } else { // print out in 4-bytes or 8-bytes @@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nBytes = 4; if (AP.nvptxSubtarget.is64Bit()) nBytes = 8; - for (pos=0; pos<size; pos+=nBytes) { + for (pos = 0; pos < size; pos += nBytes) { if (pos) O << ", "; if (pos == nextSymbolPos) { @@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { MCSymbol *Name = AP.Mang->getSymbol(GVar); O << *Name; - } - else if (ConstantExpr *Cexpr = - dyn_cast<ConstantExpr>(v)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else llvm_unreachable("symbol type unknown"); nSym++; if (nSym >= numSymbols) - nextSymbolPos = size+1; + nextSymbolPos = size + 1; else nextSymbolPos = symbolPosInBuffer[nSym]; - } else - if (nBytes == 4) - O << *(unsigned int*)(buffer+pos); - else - O << *(unsigned long long*)(buffer+pos); + } else if (nBytes == 4) + O << *(unsigned int *)(buffer + pos); + else + O << *(unsigned long long *)(buffer + pos); } } } @@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { virtual void emitSrcInText(StringRef filename, unsigned line); -private : - virtual const char *getPassName() const { - return "NVPTX Assembly Printer"; - } +private: + virtual const char *getPassName() const { return "NVPTX Assembly Printer"; } const Function *F; std::string CurrentFnName; @@ -207,31 +195,28 @@ private : void printGlobalVariable(const GlobalVariable *GVar); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); - void printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, raw_ostream &O); + const char *Modifier = 0); + void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, + raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; // definition autogenerated. void printInstruction(const MachineInstr *MI, raw_ostream &O); - void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, - bool=false); + void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false); void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitHeader(Module &M, raw_ostream &O); - void emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const; + void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); void emitFunctionExternParamList(const MachineFunction &MF); void emitFunctionParamList(const Function *, raw_ostream &O); void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); - void emitFunctionTempData(const MachineFunction &MF, - unsigned &FrameSize); + void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize); bool isImageType(const Type *Ty); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, @@ -269,17 +254,16 @@ private: void recordAndEmitFilenames(Module &); void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); - void emitPTXAddressSpace(unsigned int AddressSpace, - raw_ostream &O) const; - std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ; - void printScalarConstant(Constant *CPV, raw_ostream &O) ; - void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ; - void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ; - void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ; + void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; + std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; + void printScalarConstant(Constant *CPV, raw_ostream &O); + void printFPConstant(const ConstantFP *Fp, raw_ostream &O); + void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer); + void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer); void printOperandProper(const MachineOperand &MO); - void emitLinkageDirective(const GlobalValue* V, raw_ostream &O); + void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); void emitDeclarations(Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); @@ -289,10 +273,9 @@ private: LineReader *reader; LineReader *getReader(std::string); public: - NVPTXAsmPrinter(TargetMachine &TM, - MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { CurrentBankselLabelInBasicBlock = ""; VRidGlobal2LocalMap = NULL; reader = NULL; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index bb2c55ceed..6533da5102 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -25,9 +25,7 @@ using namespace llvm; -bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { - return true; -} +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFrameInfo()->hasStackObjects()) { @@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // mov %SPL, %depot; // cvta.local %SP, %SPL; if (is64bit) { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } else { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } - } - else { + } else { // mov %SP, %depot; if (is64bit) - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); else - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); } } } void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { -} + MachineBasicBlock &MBB) const {} // This function eliminates ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions -void NVPTXFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +void NVPTXFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { // Simply discard ADJCALLSTACKDOWN, // ADJCALLSTACKUP instructions. MBB.erase(I); } - diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index d34e7bec1d..819f1dd3f4 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -16,7 +16,6 @@ #include "llvm/Target/TargetFrameLowering.h" - namespace llvm { class NVPTXTargetMachine; @@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering { public: explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), - tm(_tm), is64bit(_is64bit) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm), + is64bit(_is64bit) {} virtual bool hasFP(const MachineFunction &MF) const; virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 481f13afd1..e862988c85 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "NVPTXISelDAGToDAG.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -26,27 +25,22 @@ using namespace llvm; - -static cl::opt<bool> -UseFMADInstruction("nvptx-mad-enable", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Enable generating FMAD instructions"), - cl::init(false)); +static cl::opt<bool> UseFMADInstruction( + "nvptx-mad-enable", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Enable generating FMAD instructions"), + cl::init(false)); static cl::opt<int> -FMAContractLevel("nvptx-fma-level", - cl::ZeroOrMore, +FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); - + " 1: do it 2: do it aggressively"), + cl::init(2)); -static cl::opt<int> -UsePrecDivF32("nvptx-prec-divf32", - cl::ZeroOrMore, - cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if avaiable."), - cl::init(2)); +static cl::opt<int> UsePrecDivF32( + "nvptx-prec-divf32", cl::ZeroOrMore, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if avaiable."), + cl::init(2)); /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. @@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, return new NVPTXDAGToDAGISel(TM, OptLevel); } - NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) -: SelectionDAGISel(tm, OptLevel), - Subtarget(tm.getSubtarget<NVPTXSubtarget>()) -{ + : SelectionDAGISel(tm, OptLevel), + Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { // Always do fma.f32 fpcontract if the target supports the instruction. // Always do fma.f64 fpcontract if the target supports the instruction. // Do mad.f32 is nvptx-mad-enable is specified and the target does not // support fma.f32. doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); - doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel>=1); - doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel>=1); - doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel==2); - doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel==2); + doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); + doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); + doFMAF32AGG = + (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2); + doFMAF64AGG = + (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; @@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. -SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) - return NULL; // Already selected. + return NULL; // Already selected. SDNode *ResNode = NULL; switch (N->getOpcode()) { @@ -119,30 +109,34 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreV4: ResNode = SelectStoreVector(N); break; - default: break; + default: + break; } if (ResNode) return ResNode; return SelectCode(N); } - -static unsigned int -getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) -{ +static unsigned int getCodeAddrSpace(MemSDNode *N, + const NVPTXSubtarget &Subtarget) { const Value *Src = N->getSrcValue(); if (!Src) return NVPTX::PTXLdStInstCode::LOCAL; if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { switch (PT->getAddressSpace()) { - case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; - case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; - case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; + case llvm::ADDRESS_SPACE_LOCAL: + return NVPTX::PTXLdStInstCode::LOCAL; + case llvm::ADDRESS_SPACE_GLOBAL: + return NVPTX::PTXLdStInstCode::GLOBAL; + case llvm::ADDRESS_SPACE_SHARED: + return NVPTX::PTXLdStInstCode::SHARED; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: return NVPTX::PTXLdStInstCode::CONSTANT; - case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; - case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; + case llvm::ADDRESS_SPACE_GENERIC: + return NVPTX::PTXLdStInstCode::GENERIC; + case llvm::ADDRESS_SPACE_PARAM: + return NVPTX::PTXLdStInstCode::PARAM; case llvm::ADDRESS_SPACE_CONST: // If the arch supports generic address space, translate it to GLOBAL // for correctness. @@ -153,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) return NVPTX::PTXLdStInstCode::GLOBAL; else return NVPTX::PTXLdStInstCode::CONSTANT; - default: break; + default: + break; } } return NVPTX::PTXLdStInstCode::LOCAL; } - -SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); - SDNode *NVPTXLD= NULL; + SDNode *NVPTXLD = NULL; // do not support pre/post inc/dec if (LD->isIndexed()) @@ -204,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned fromTypeWidth = ScalarVT.getSizeInBits(); + unsigned fromTypeWidth = ScalarVT.getSizeInBits(); unsigned int fromType; if ((LD->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; @@ -223,105 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; - case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; - case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; - case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; - case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; - case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Addr, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N1.getNode(), N1, Base, Offset): - SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Addr, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) + : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; - case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; - case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; - case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; - case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; - case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N1.getNode(), N1, Base, Offset): - SelectADDRri(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N1.getNode(), N1, Base, Offset) + : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari_64; + break; + default: + return NULL; } } else { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari; + break; + default: + return NULL; } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } - else { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else { if (Subtarget.is64Bit()) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg_64; + break; + default: + return NULL; } } else { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg; + break; + default: + return NULL; } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - N1, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), N1, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } if (NVPTXLD != NULL) { @@ -344,9 +399,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { MemSDNode *MemSD = cast<MemSDNode>(N); EVT LoadedVT = MemSD->getMemoryVT(); - if (!LoadedVT.isSimple()) - return NULL; + return NULL; // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); @@ -369,11 +423,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned FromTypeWidth = ScalarVT.getSizeInBits(); + unsigned FromTypeWidth = ScalarVT.getSizeInBits(); unsigned int FromType; // The last operand holds the original LoadSDNode::getExtensionType() value - unsigned ExtensionType = - cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue(); + unsigned ExtensionType = cast<ConstantSDNode>( + N->getOperand(N->getNumOperands() - 1))->getZExtValue(); if (ExtensionType == ISD::SEXTLOAD) FromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) @@ -384,197 +438,328 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { unsigned VecType; switch (N->getOpcode()) { - case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; - case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; - default: return NULL; + case NVPTXISD::LoadV2: + VecType = NVPTX::PTXLdStInstCode::V2; + break; + case NVPTXISD::LoadV4: + VecType = NVPTX::PTXLdStInstCode::V4; + break; + default: + return NULL; } EVT EltVT = N->getValueType(0); if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_avar; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_avar; + break; } break; } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Addr, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(Op1.getNode(), Op1, Base, Offset): - SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_asi; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_asi; + break; } break; } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(Op1.getNode(), Op1, Base, Offset): - SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari_64; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari; + break; } break; } } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg_64; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg; + break; } break; } } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Op1, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Op1, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); } @@ -598,89 +783,179 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { // Select opcode if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LDGV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + break; } break; case NVPTXISD::LDGV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + break; } break; case NVPTXISD::LDUV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + break; } break; case NVPTXISD::LDUV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LDGV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + break; } break; case NVPTXISD::LDGV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + break; } break; case NVPTXISD::LDUV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + break; } break; case NVPTXISD::LDUV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + break; } break; } @@ -696,8 +971,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { return LD; } - -SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); EVT StoreVT = ST->getMemoryVT(); @@ -738,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); - unsigned toTypeWidth = ScalarVT.getSizeInBits(); + unsigned toTypeWidth = ScalarVT.getSizeInBits(); unsigned int toType; if (ScalarVT.isFloatingPoint()) toType = NVPTX::PTXLdStInstCode::Float; @@ -757,108 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; - case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; - case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; - case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; - case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; - case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Addr, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Addr, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; - case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; - case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; - case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; - case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; - case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari_64; + break; + default: + return NULL; } } else { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari; + break; + default: + return NULL; } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { if (Subtarget.is64Bit()) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg_64; + break; + default: + return NULL; } } else { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg; + break; + default: + return NULL; } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - N2, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), N2, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } if (NVPTXST != NULL) { @@ -901,14 +1233,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { // - for integer type, always use 'u' assert(StoreVT.isSimple() && "Store value is not simple"); MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); - unsigned ToTypeWidth = ScalarVT.getSizeInBits(); + unsigned ToTypeWidth = ScalarVT.getSizeInBits(); unsigned ToType; if (ScalarVT.isFloatingPoint()) ToType = NVPTX::PTXLdStInstCode::Float; else ToType = NVPTX::PTXLdStInstCode::Unsigned; - SmallVector<SDValue, 12> StOps; SDValue N2; unsigned VecType; @@ -928,7 +1259,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { StOps.push_back(N->getOperand(4)); N2 = N->getOperand(5); break; - default: return NULL; + default: + return NULL; } StOps.push_back(getI32Imm(IsVolatile)); @@ -939,105 +1271,197 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_avar; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_avar; + break; } break; } StOps.push_back(Addr); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_asi; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_asi; + break; } break; } StOps.push_back(Base); StOps.push_back(Offset); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari_64; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari; + break; } break; } @@ -1047,49 +1471,95 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg_64; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg; + break; } break; } @@ -1112,8 +1582,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { // Return true if TGA or ES. - if (N.getOpcode() == ISD::TargetGlobalAddress - || N.getOpcode() == ISD::TargetExternalSymbol) { + if (N.getOpcode() == ISD::TargetGlobalAddress || + N.getOpcode() == ISD::TargetExternalSymbol) { Address = N; return true; } @@ -1131,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { } // symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRsi_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - SDValue base=Addr.getOperand(0); + SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); return true; @@ -1159,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, } // register+offset -bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRri_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); Offset = CurDAG->getTargetConstant(0, mvt); @@ -1169,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // direct calls. + return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { if (SelectDirectAddr(Addr.getOperand(0), Addr)) { @@ -1177,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { if (FrameIndexSDNode *FIN = - dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else @@ -1209,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { Src = mN->getSrcValue(); - } - else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { + } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { Src = mN->getSrcValue(); } if (!Src) @@ -1222,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. -bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; switch (ConstraintCode) { - default: return true; - case 'm': // memory + default: + return true; + case 'm': // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); @@ -1251,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, // pattern matcher inserts a bunch of IMOVi8rr to convert // the imm to i8imm, and this causes instruction selection // to fail. -bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, - SDValue &Retval) { - if (!(N.getOpcode() == ISD::UNDEF) && - !(N.getOpcode() == ISD::Constant)) +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) { + if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant)) return false; if (N.getOpcode() == ISD::UNDEF) diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 4ec924117a..70e8e46429 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -64,11 +64,10 @@ public: const NVPTXSubtarget &Subtarget; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); + virtual bool SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); private: - // Include the pieces autogenerated from the target description. +// Include the pieces autogenerated from the target description. #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); @@ -99,7 +98,6 @@ private: bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); - bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index e9a9fbfd04..6e01a5a820 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "NVPTXISelLowering.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" @@ -44,14 +43,14 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; -static cl::opt<bool> -sched4reg("nvptx-sched4reg", - cl::desc("NVPTX Specific: schedule for register pressue"), - cl::init(false)); +static cl::opt<bool> sched4reg( + "nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); static bool IsPTXVectorType(MVT VT) { switch (VT.SimpleTy) { - default: return false; + default: + return false; case MVT::v2i8: case MVT::v4i8: case MVT::v2i16: @@ -62,22 +61,21 @@ static bool IsPTXVectorType(MVT VT) { case MVT::v2f32: case MVT::v4f32: case MVT::v2f64: - return true; + return true; } } // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) -: TargetLowering(TM, new NVPTXTargetObjectFile()), - nvTM(&TM), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - MaxStoresPerMemset = (unsigned)0xFFFFFFFF; - MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - MaxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -100,52 +98,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); // Operations not directly supported by NVPTX. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i8, Expand); - setOperationAction(ISD::BR_CC, MVT::i16, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (nvptxSubtarget.hasROT64()) { - setOperationAction(ISD::ROTL , MVT::i64, Legal); - setOperationAction(ISD::ROTR , MVT::i64, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i64, Expand); - setOperationAction(ISD::ROTR , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); } if (nvptxSubtarget.hasROT32()) { - setOperationAction(ISD::ROTL , MVT::i32, Legal); - setOperationAction(ISD::ROTR , MVT::i32, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i32, Expand); - setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); } - setOperationAction(ISD::ROTL , MVT::i16, Expand); - setOperationAction(ISD::ROTR , MVT::i16, Expand); - setOperationAction(ISD::ROTL , MVT::i8, Expand); - setOperationAction(ISD::ROTR , MVT::i8, Expand); - setOperationAction(ISD::BSWAP , MVT::i16, Expand); - setOperationAction(ISD::BSWAP , MVT::i32, Expand); - setOperationAction(ISD::BSWAP , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + setOperationAction(ISD::BSWAP, MVT::i16, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); // Indirect branch is not supported. // This also disables Jump Table creation. - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); - setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // We want to legalize constant related memmove and memcopy // intrinsics. @@ -168,16 +164,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::i8, MVT::i1, Expand); // This is legal in NVPTX - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // TRAP can be lowered to PTX trap - setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Legal); // Register custom handling for vector loads/stores - for (int i = MVT::FIRST_VECTOR_VALUETYPE; - i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; + for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT VT = (MVT::SimpleValueType) i; if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -190,49 +186,86 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) computeRegisterProperties(); } - const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; - case NVPTXISD::CALL: return "NVPTXISD::CALL"; - case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; - case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; - case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; - case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; + default: + return 0; + case NVPTXISD::CALL: + return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: + return "NVPTXISD::RET_FLAG"; + case NVPTXISD::Wrapper: + return "NVPTXISD::Wrapper"; + case NVPTXISD::NVBuiltin: + return "NVPTXISD::NVBuiltin"; + case NVPTXISD::DeclareParam: + return "NVPTXISD::DeclareParam"; case NVPTXISD::DeclareScalarParam: return "NVPTXISD::DeclareScalarParam"; - case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; - case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; - case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; - case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; - case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; - case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; - case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; - case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; - case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; - case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; - case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; - case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; - case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; - case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; - case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; - case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; - case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; - case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; - case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; - case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; - case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; - case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; - case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; - case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; - case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; - case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4"; - case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2"; - case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4"; - case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2"; - case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4"; - case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2"; - case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; + case NVPTXISD::DeclareRet: + return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareRetParam: + return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: + return "NVPTXISD::PrintCall"; + case NVPTXISD::LoadParam: + return "NVPTXISD::LoadParam"; + case NVPTXISD::StoreParam: + return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamS32: + return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: + return "NVPTXISD::StoreParamU32"; + case NVPTXISD::MoveToParam: + return "NVPTXISD::MoveToParam"; + case NVPTXISD::CallArgBegin: + return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: + return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: + return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: + return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: + return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: + return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: + return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: + return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: + return "NVPTXISD::MoveParam"; + case NVPTXISD::MoveRetval: + return "NVPTXISD::MoveRetval"; + case NVPTXISD::MoveToRetval: + return "NVPTXISD::MoveToRetval"; + case NVPTXISD::StoreRetval: + return "NVPTXISD::StoreRetval"; + case NVPTXISD::PseudoUseParam: + return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: + return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: + return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: + return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: + return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: + return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: + return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: + return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: + return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: + return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: + return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: + return "NVPTXISD::StoreV4"; } } @@ -248,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } -std::string NVPTXTargetLowering::getPrototype(Type *retTy, - const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, - unsigned retAlignment) const { +std::string NVPTXTargetLowering::getPrototype( + Type *retTy, const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); @@ -267,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; - } - else { + if (size < 32) + size = 32; + } else { assert(retTy->isFloatingPointTy() && "Floating point type expected here"); size = retTy->getPrimitiveSizeInBits(); } O << ".param .b" << size << " _"; - } - else if (isa<PointerType>(retTy)) - O << ".param .b" << getPointerTy().getSizeInBits() - << " _"; + } else if (isa<PointerType>(retTy)) + O << ".param .b" << getPointerTy().getSizeInBits() << " _"; else { if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } - O << ".param .align " - << retAlignment - << " .b8 _[" - << totalsz << "]"; - } - else { - assert(false && - "Unknown return type"); + O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; + } else { + assert(false && "Unknown return type"); } } - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -322,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " _"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -340,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, bool first = true; MVT thePointerTy = getPointerTy(); - for (unsigned i=0,e=Args.size(); i!=e; ++i) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { const Type *Ty = Args[i].Ty; if (!first) { O << ", "; @@ -351,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -365,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, continue; } const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); unsigned sz = getDataLayout()->getTypeAllocSize(ETy); - O << ".param .align " << align - << " .b8 "; + O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; continue; - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -389,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " "; O << "_"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; } - if (i<e-1) + if (i < e - 1) O << ", "; } continue; @@ -406,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, return O.str(); } - -SDValue -NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { - SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; +SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.Args; - Type *retTy = CLI.RetTy; - ImmutableCallSite *CS = CLI.CS; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + ArgListTy &Args = CLI.Args; + Type *retTy = CLI.RetTy; + ImmutableCallSite *CS = CLI.CS; bool isABI = (nvptxSubtarget.getSmVersion() >= 20); SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true)); + Chain = + DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true)); SDValue InFlag = Chain.getValue(1); assert((Outs.size() == Args.size()) && @@ -434,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned paramCount = 0; // Declare the .params or .reg need to pass values // to the function - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT VT = Outs[i].VT; if (Outs[i].Flags.isByVal() == false) { @@ -445,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isABI) isReg = 0; unsigned sz = VT.getSizeInBits(); - if (VT.isInteger() && (sz < 32)) sz = 32; + if (VT.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - DAG.getConstant(isReg, MVT::i32), - InFlag }; + DAG.getConstant(isReg, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; + DAG.getConstant(0, MVT::i32), OutVals[i], + InFlag }; unsigned opcode = NVPTXISD::StoreParam; if (isReg) @@ -477,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // struct or vector SmallVector<EVT, 16> vtparts; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); - assert(PTy && - "Type of a byval parameter should be pointer"); + assert(PTy && "Type of a byval parameter should be pointer"); ComputeValueVTs(*this, PTy->getElementType(), vtparts); if (isABI) { @@ -488,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The ByValAlign in the Outs[i].Flags is alway set at this point, so we // don't need to // worry about natural alignment or not. See TargetLowering::LowerCallTo() - SDValue DeclareParamOps[] = { Chain, - DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), + InFlag + }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), - OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(curOffset, MVT::i32), - theVal, InFlag }; + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + theVal, InFlag }; Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, 5); InFlag = Chain.getValue(1); - curOffset += sz/8; + curOffset += sz / 8; } } ++paramCount; @@ -530,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Non-abi, struct or vector // Declare a bunch or .reg .b<size> .param<n> unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(1, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(1, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), + false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(0, MVT::i32), theVal, @@ -578,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or // individual .reg .b<size> func_retval<0..> for non ABI unsigned resultsz = 0; - for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = resvtparts[i]; if (resvtparts[i].isVector()) { elems = resvtparts[i].getVectorNumElements(); elemtype = resvtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (isABI == false) { - if (elemtype.isInteger() && (sz < 32)) sz = 32; - } - else { - if (elemtype.isInteger() && (sz < 8)) sz = 8; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; + } else { + if (elemtype.isInteger() && (sz < 8)) + sz = 8; } if (isABI == false) { SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); @@ -609,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (isABI) { if (retTy->isPrimitiveType() || retTy->isIntegerTy() || - retTy->isPointerTy() ) { + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -620,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); - } - else { + } else { if (Func) { // direct call if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) retAlignment = getDataLayout()->getABITypeAlignment(retTy); @@ -631,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, retAlignment = getDataLayout()->getABITypeAlignment(retTy); } SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, - MVT::i32), - DAG.getConstant(resultsz/8, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { Chain, + DAG.getConstant(retAlignment, MVT::i32), + DAG.getConstant(resultsz / 8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); @@ -652,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // INLINEASM SDNode. SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); - const char *asmstr = nvTM->getManagedStrPool()-> - getManagedString(proto_string.c_str())->c_str(); - SDValue InlineAsmOps[] = { Chain, - DAG.getTargetExternalSymbol(asmstr, - getPointerTy()), - DAG.getMDNode(0), - DAG.getTargetConstant(0, MVT::i32), InFlag }; + const char *asmstr = nvTM->getManagedStrPool() + ->getManagedString(proto_string.c_str())->c_str(); + SDValue InlineAsmOps[] = { + Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()), + DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag + }; Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); InFlag = Chain.getValue(1); } // Op to just print "call" SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrintCallOps[] = { Chain, - DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) - : retCount, MVT::i32), - InFlag }; - Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, - PrintCallVTs, PrintCallOps, 3); + SDValue PrintCallOps[] = { + Chain, + DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32), + InFlag + }; + Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), + dl, PrintCallVTs, PrintCallOps, 3); InFlag = Chain.getValue(1); // Ops to print out the function name @@ -685,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallArgBeginOps, 2); InFlag = Chain.getValue(1); - for (unsigned i=0, e=paramCount; i!=e; ++i) { + for (unsigned i = 0, e = paramCount; i != e; ++i) { unsigned opcode; - if (i==(e-1)) + if (i == (e - 1)) opcode = NVPTXISD::LastCallArg; else opcode = NVPTXISD::CallArg; SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(i, MVT::i32), - InFlag }; + DAG.getConstant(i, MVT::i32), InFlag }; Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); InFlag = Chain.getValue(1); } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgEndOps[] = { Chain, - DAG.getConstant(Func ? 1 : 0, MVT::i32), + SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, - 3); + Chain = + DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); InFlag = Chain.getValue(1); if (!Func) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { Chain, - DAG.getConstant(uniqueCallSite, MVT::i32), + SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); InFlag = Chain.getValue(1); @@ -719,32 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Ins.size() > 0) { if (isABI) { unsigned resoffset = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = Ins[i].VT.getSizeInBits(); - if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; + if (Ins[i].VT.isInteger() && (sz < 8)) + sz = 8; EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; - SDValue LoadRetOps[] = { - Chain, - DAG.getConstant(1, MVT::i32), - DAG.getConstant(resoffset, MVT::i32), - InFlag - }; + SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resoffset, MVT::i32), InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, LoadRetOps, array_lengthof(LoadRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); InVals.push_back(retval); - resoffset += sz/8; + resoffset += sz / 8; } - } - else { + } else { SmallVector<EVT, 16> resvtparts; ComputeValueVTs(*this, retTy, resvtparts); assert(Ins.size() == resvtparts.size() && "Unexpected number of return values in non-ABI case"); unsigned paramNum = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(EVT(Ins[i].VT) == resvtparts[i] && "Unexpected EVT type in non-ABI case"); unsigned numelems = 1; @@ -754,14 +773,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, elemtype = Ins[i].VT.getVectorElementType(); } std::vector<SDValue> tempRetVals; - for (unsigned j=0; j<numelems; ++j) { + for (unsigned j = 0; j < numelems; ++j) { EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; - SDValue MoveRetOps[] = { - Chain, - DAG.getConstant(0, MVT::i32), - DAG.getConstant(paramNum, MVT::i32), - InFlag - }; + SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32), + DAG.getConstant(paramNum, MVT::i32), + InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, MoveRetOps, array_lengthof(MoveRetOps)); Chain = retval.getValue(1); @@ -777,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } } - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true), - DAG.getIntPtrConstant(uniqueCallSite+1, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), + DAG.getIntPtrConstant(uniqueCallSite + 1, true), InFlag); uniqueCallSite++; @@ -792,45 +807,51 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() // (see LegalizeDAG.cpp). This is slow and uses local memory. // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 -SDValue NVPTXTargetLowering:: -LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); SmallVector<SDValue, 8> Ops; unsigned NumOperands = Node->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { + for (unsigned i = 0; i < NumOperands; ++i) { SDValue SubOp = Node->getOperand(i); EVT VVT = SubOp.getNode()->getValueType(0); EVT EltVT = VVT.getVectorElementType(); unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j=0; j < NumSubElem; ++j) { + for (unsigned j = 0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, DAG.getIntPtrConstant(j))); } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], + Ops.size()); } -SDValue NVPTXTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::RETURNADDR: return SDValue(); - case ISD::FRAMEADDR: return SDValue(); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return Op; + case ISD::RETURNADDR: + return SDValue(); + case ISD::FRAMEADDR: + return SDValue(); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return Op; case ISD::BUILD_VECTOR: case ISD::EXTRACT_SUBVECTOR: return Op; - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::LOAD: + return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } - SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); @@ -842,24 +863,22 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // => // v1 = ld i8* addr // v = trunc v1 to i1 -SDValue NVPTXTargetLowering:: -LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(Node); DebugLoc dl = Node->getDebugLoc(); - assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(LD->getExtensionType() == ISD::NON_EXTLOAD); assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); - SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - LD->getAlignment()); + SDValue newLD = + DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); // The legalizer (the caller) is expecting two values from the legalized // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() // in LegalizeDAG.cpp which also uses MergeValues. - SDValue Ops[] = {result, LD->getChain()}; + SDValue Ops[] = { result, LD->getChain() }; return DAG.getMergeValues(Ops, 2, dl); } @@ -887,7 +906,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { if (!ValVT.isSimple()) return SDValue(); switch (ValVT.getSimpleVT().SimpleTy) { - default: return SDValue(); + default: + return SDValue(); case MVT::v2i8: case MVT::v2i16: case MVT::v2i32: @@ -914,7 +934,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { NeedExt = true; switch (NumElts) { - default: return SDValue(); + default: + return SDValue(); case 2: Opcode = NVPTXISD::StoreV2; break; @@ -947,11 +968,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { MemSDNode *MemSD = cast<MemSDNode>(N); - SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL, - DAG.getVTList(MVT::Other), &Ops[0], - Ops.size(), MemSD->getMemoryVT(), - MemSD->getMemOperand()); - + SDValue NewSt = DAG.getMemIntrinsicNode( + Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); //return DCI.CombineTo(N, NewSt, true); return NewSt; @@ -964,8 +983,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // => // v1 = zxt v to i8 // st i8, addr -SDValue NVPTXTargetLowering:: -LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(Node); @@ -976,18 +994,14 @@ LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, - MVT::i8, Tmp3); - SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); return Result; } - -SDValue -NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, - EVT v) const { +SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, + int idx, EVT v) const { std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); std::stringstream suffix; suffix << idx; @@ -1000,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return getExtSymb(DAG, ".PARAM", idx, v); } -SDValue -NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { +SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { return getExtSymb(DAG, ".HLPPARAM", idx); } // Check to see if the kernel argument is image*_t or sampler_t bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { - static const char *const specialTypes[] = { - "struct._image2d_t", - "struct._image3d_t", - "struct._sampler_t" - }; + static const char *const specialTypes[] = { "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" }; const Type *Ty = arg->getType(); const PointerType *PTy = dyn_cast<PointerType>(Ty); @@ -1033,12 +1044,10 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { return false; } -SDValue -NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue NVPTXTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); const DataLayout *TD = getDataLayout(); @@ -1054,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { + I != E; ++I) { theArgs.push_back(I); argTypes.push_back(I->getType()); } - assert(argTypes.size() == Ins.size() && - "Ins types and function types did not match"); + //assert(argTypes.size() == Ins.size() && + // "Ins types and function types did not match"); int idx = 0; - for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { + for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) { Type *Ty = argTypes[i]; EVT ObjectVT = getValueType(Ty); - assert(ObjectVT == Ins[i].VT && - "Ins type did not match function type"); + //assert(ObjectVT == Ins[i].VT && + // "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later // matched in the AsmPrinter to output the correct mangled name. - if (isImageOrSamplerVal(theArgs[i], - (theArgs[i]->getParent() ? - theArgs[i]->getParent()->getParent() : 0))) { + if (isImageOrSamplerVal( + theArgs[i], + (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() + : 0))) { assert(isKernel && "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i+1, MVT::i32)); + InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); continue; } if (theArgs[i]->use_empty()) { // argument is dead - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + if (ObjectVT.isVector()) { + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned NumElts = ObjectVT.getVectorNumElements(); + for (unsigned vi = 0; vi < NumElts; ++vi) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT)); + } + } else { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + } continue; } @@ -1089,31 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) { + if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (ObjectVT.isVector()) { + unsigned NumElts = ObjectVT.getVectorNumElements(); + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned Offset = 0; + for (unsigned vi = 0; vi < NumElts; ++vi) { + SDValue A = getParamSymbol(DAG, idx, getPointerTy()); + SDValue B = DAG.getIntPtrConstant(Offset); + SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + //getParamSymbol(DAG, idx, EltVT), + //DAG.getConstant(Offset, getPointerTy())); + A, B); + Value *SrcValue = Constant::getNullValue(PointerType::get( + EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); + SDValue Ld = DAG.getLoad( + EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false, + false, + TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); + Offset += EltVT.getStoreSizeInBits() / 8; + InVals.push_back(Ld); + } + continue; + } + // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); // Conjure up a value that we can get the address space from. // FIXME: Using a constant here is a hack. - Value *srcValue = Constant::getNullValue(PointerType::get( - ObjectVT.getTypeForEVT(F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); - SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, - MachinePointerInfo(srcValue), false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT( - F->getContext()))); + Value *srcValue = Constant::getNullValue( + PointerType::get(ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad( + ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, + false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); - } - else { + } else { // If no ABI, just move the param symbol SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); } continue; @@ -1130,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); if (isKernel) InVals.push_back(p); else { - SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, - DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), - p); + SDValue p2 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); InVals.push_back(p2); } } else { // Have to move a set of param symbols to registers and // store them locally and return the local pointer in InVals const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); - assert(elemPtrType && - "Byval parameter should be a pointer type"); + assert(elemPtrType && "Byval parameter should be a pointer type"); Type *elemType = elemPtrType->getElementType(); // Compute the constituent parts SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); unsigned totalsize = 0; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) totalsize += vtparts[j].getStoreSizeInBits(); - SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> - CreateStackObject(totalsize/8, 16, false), - getPointerTy()); + SDValue localcopy = DAG.getFrameIndex( + MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false), + getPointerTy()); unsigned sizesofar = 0; std::vector<SDValue> theChains; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned numElems = 1; - if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); - for (unsigned k=0, ke=numElems; k!=ke; ++k) { + if (vtparts[j].isVector()) + numElems = vtparts[j].getVectorNumElements(); + for (unsigned k = 0, ke = numElems; k != ke; ++k) { EVT tmpvt = vtparts[j]; - if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); + if (tmpvt.isVector()) + tmpvt = tmpvt.getVectorElementType(); SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, getParamSymbol(DAG, idx, tmpvt)); - SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, - DAG.getConstant(sizesofar, getPointerTy())); - theChains.push_back(DAG.getStore(Chain, dl, arg, addr, - MachinePointerInfo(), false, false, 0)); - sizesofar += tmpvt.getStoreSizeInBits()/8; + SDValue addr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, + DAG.getConstant(sizesofar, getPointerTy())); + theChains.push_back(DAG.getStore( + Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); + sizesofar += tmpvt.getStoreSizeInBits() / 8; ++idx; } } @@ -1190,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, //} if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size())); + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], + OutChains.size())); return Chain; } -SDValue -NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, + SelectionDAG &DAG) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); unsigned sizesofar = 0; unsigned idx = 0; - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; EVT theValType = theVal.getValueType(); unsigned numElems = 1; - if (theValType.isVector()) numElems = theValType.getVectorNumElements(); - for (unsigned j=0,je=numElems; j!=je; ++j) { + if (theValType.isVector()) + numElems = theValType.getVectorNumElements(); + for (unsigned j = 0, je = numElems; j != je; ++j) { SDValue tmpval = theVal; if (theValType.isVector()) tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), - tmpval, DAG.getIntPtrConstant(j)); - Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, - dl, MVT::Other, - Chain, - DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), + theValType.getVectorElementType(), tmpval, + DAG.getIntPtrConstant(j)); + Chain = DAG.getNode( + isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl, + MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), tmpval); if (theValType.isVector()) - sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; + sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += theValType.getStoreSizeInBits()/8; + sizesofar += theValType.getStoreSizeInBits() / 8; ++idx; } } @@ -1234,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); } -void -NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const -{ +void NVPTXTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { if (Constraint.length() > 1) return; else @@ -1249,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // NVPTX suuport vector of legal types of any length in Intrinsics because the // NVPTX specific type legalizer // will legalize them to the PTX supported length. -bool -NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { +bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { if (isTypeLegal(VT)) return true; if (VT.isVector()) { @@ -1261,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { return false; } - // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as // TgtMemIntrinsic // because we need the information that is only available in the "Value" type // of destination // pointer. In particular, the address space information. -bool -NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, - unsigned Intrinsic) const { +bool NVPTXTargetLowering::getTgtMemIntrinsic( + IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { default: return false; @@ -1325,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, /// Used to guide target specific optimizations, like loop strength reduction /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) -bool -NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { +bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg @@ -1345,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, } switch (AM.Scale) { - case 0: // "r", "r+i" or "i" is allowed + case 0: // "r", "r+i" or "i" is allowed break; case 1: - if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. return false; // Otherwise we have r+i. break; @@ -1385,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } - -std::pair<unsigned, const TargetRegisterClass*> +std::pair<unsigned, const TargetRegisterClass *> NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { @@ -1409,8 +1441,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } - - /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; @@ -1418,7 +1448,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, - SmallVectorImpl<SDValue>& Results) { + SmallVectorImpl<SDValue> &Results) { EVT ResVT = N->getValueType(0); DebugLoc DL = N->getDebugLoc(); @@ -1429,7 +1459,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, // but I'm leaving that as a TODO for now. assert(ResVT.isSimple() && "Can only handle simple types"); switch (ResVT.getSimpleVT().SimpleTy) { - default: return; + default: + return; case MVT::v2i8: case MVT::v2i16: case MVT::v2i32: @@ -1460,7 +1491,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDVTList LdResVTs; switch (NumElts) { - default: return; + default: + return; case 2: Opcode = NVPTXISD::LoadV2; LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); @@ -1500,14 +1532,14 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); Results.push_back(BuildVec); Results.push_back(LoadChain); } -static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, - SelectionDAG &DAG, +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Results) { SDValue Chain = N->getOperand(0); SDValue Intrin = N->getOperand(1); @@ -1515,8 +1547,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // Get the intrinsic ID unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1544,10 +1577,12 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SDVTList LdResVTs; switch (NumElts) { - default: return; + default: + return; case 2: - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1562,8 +1597,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); break; case 4: { - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1586,29 +1622,31 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // Copy regular operands OtherOps.push_back(Chain); // Chain - // Skip operand 1 (intrinsic ID) - // Others + // Skip operand 1 (intrinsic ID) + // Others for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) OtherOps.push_back(N->getOperand(i)); MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); - SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], - OtherOps.size(), MemSD->getMemoryVT(), - MemSD->getMemOperand()); + SDValue NewLD = DAG.getMemIntrinsicNode( + Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); SmallVector<SDValue, 4> ScalarRes; for (unsigned i = 0; i < NumElts; ++i) { SDValue Res = NewLD.getValue(i); if (NeedTrunc) - Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + Res = + DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); ScalarRes.push_back(Res); } SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); Results.push_back(BuildVec); Results.push_back(LoadChain); @@ -1629,10 +1667,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // We make sure the memory type is i8, which will be used during isel // to select the proper instruction. - SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, - LdResVTs, &Ops[0], - Ops.size(), MVT::i8, - MemSD->getMemOperand()); + SDValue NewLD = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], + Ops.size(), MVT::i8, MemSD->getMemOperand()); Results.push_back(NewLD.getValue(0)); Results.push_back(NewLD.getValue(1)); @@ -1641,11 +1678,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, } } -void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const { +void NVPTXTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { - default: report_fatal_error("Unhandled custom legalization"); + default: + report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: ReplaceLoadVector(N, DAG, Results); return; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 14afc148cb..3cd49d38af 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -87,7 +87,7 @@ public: bool isTypeSupportedInIntrinsic(MVT VT) const; - bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const; /// isLegalAddressingMode - Return true if the addressing mode represented @@ -107,14 +107,13 @@ public: } ConstraintType getConstraintType(const std::string &Constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - virtual SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + virtual SDValue LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; @@ -136,17 +135,15 @@ public: NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::i32; - } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual bool shouldSplitVectorElementType(EVT VT) const; private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = - MVT::i32) const; + SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, + EVT = MVT::i32) const; SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); @@ -159,8 +156,7 @@ private: SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 9e73d80c28..33a63c26f4 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -23,61 +23,55 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include <cstdio> - using namespace llvm; // FIXME: Add the subtarget support on this constructor. NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) -: NVPTXGenInstrInfo(), - TM(tm), - RegInfo(*this, *TM.getSubtargetImpl()) {} - + : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {} -void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { +void NVPTXInstrInfo::copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const { if (NVPTX::Int32RegsRegClass.contains(DestReg) && NVPTX::Int32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int8RegsRegClass.contains(DestReg) && - NVPTX::Int8RegsRegClass.contains(SrcReg)) + NVPTX::Int8RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int1RegsRegClass.contains(DestReg) && - NVPTX::Int1RegsRegClass.contains(SrcReg)) + NVPTX::Int1RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float32RegsRegClass.contains(DestReg) && - NVPTX::Float32RegsRegClass.contains(SrcReg)) + NVPTX::Float32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int16RegsRegClass.contains(DestReg) && - NVPTX::Int16RegsRegClass.contains(SrcReg)) + NVPTX::Int16RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int64RegsRegClass.contains(DestReg) && - NVPTX::Int64RegsRegClass.contains(SrcReg)) + NVPTX::Int64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float64RegsRegClass.contains(DestReg) && - NVPTX::Float64RegsRegClass.contains(SrcReg)) + NVPTX::Float64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else { llvm_unreachable("Don't know how to copy a register"); } } -bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const { // Look for the appropriate part of TSFlags bool isMove = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> - NVPTX::SimpleMoveShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift; isMove = (TSFlags == 1); if (isMove) { @@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, return false; } -bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const -{ +bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const { switch (MI.getOpcode()) { - default: return false; + default: + return false; case NVPTX::INT_PTX_SREG_NTID_X: case NVPTX::INT_PTX_SREG_NTID_Y: case NVPTX::INT_PTX_SREG_NTID_Z: @@ -115,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const } } - bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isLoad = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> - NVPTX::isLoadShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift; isLoad = (TSFlags == 1); if (isLoad) AddrSpace = getLdStCodeAddrSpace(MI); @@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isStore = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> - NVPTX::isStoreShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift; isStore = (TSFlags == 1); if (isStore) AddrSpace = getLdStCodeAddrSpace(MI); return isStore; } - bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { unsigned addrspace = 0; if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS) @@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { return true; } - /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns @@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. /// -bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +bool NVPTXInstrInfo::AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) @@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it. if (SecondLastInst->getOpcode() == NVPTX::CBranch && LastInst->getOpcode() == NVPTX::GOTO) { - TBB = SecondLastInst->getOperand(1).getMBB(); + TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; @@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; + if (I == MBB.begin()) + return 0; --I; if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch) return 0; @@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); - if (I == MBB.begin()) return 1; + if (I == MBB.begin()) + return 1; --I; if (I->getOpcode() != NVPTX::CBranch) return 1; @@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned -NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { +unsigned NVPTXInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { - if (Cond.empty()) // Unconditional branch + if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); - else // Conditional branch - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()) + .addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 7b8e218b05..b1972e9b72 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -23,8 +23,7 @@ namespace llvm { -class NVPTXInstrInfo : public NVPTXGenInstrInfo -{ +class NVPTXInstrInfo : public NVPTXGenInstrInfo { NVPTXTargetMachine &TM; const NVPTXRegisterInfo RegInfo; public: @@ -50,30 +49,26 @@ public: * const TargetRegisterClass *RC) const; */ - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const ; - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, + virtual void copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isReadSpecialReg(MachineInstr &MI) const; - virtual bool CanTailMerge(const MachineInstr *MI) const ; + virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + virtual bool AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; + virtual unsigned InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { - return MI.getOperand(2).getImm(); + return MI.getOperand(2).getImm(); } }; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f7fa7aa61d..7c257b4c6a 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -25,18 +25,15 @@ using namespace llvm; -namespace llvm { -FunctionPass *createLowerAggrCopies(); -} +namespace llvm { FunctionPass *createLowerAggrCopies(); } char NVPTXLowerAggrCopies::ID = 0; // Lower MemTransferInst or load-store pair to loop -static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, - Value *dstAddr, Value *len, - //unsigned numLoads, - bool srcVolatile, bool dstVolatile, - LLVMContext &Context, Function &F) { +static void convertTransferToLoop( + Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, + //unsigned numLoads, + bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { Type *indType = len->getType(); BasicBlock *origBB = splitAt->getParent(); @@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = - dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored - dstAddr = builder.CreateBitCast(dstAddr, - PointerType::get(val->getType(), dstAS)); + dstAddr = + builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); IRBuilder<> loop(loopBB); PHINode *ind = loop.CreatePHI(len->getType(), 0); @@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - if (LoadInst * load = dyn_cast<LoadInst>(II)) { + ++II) { + if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) continue; + if (load->hasOneUse() == false) + continue; - if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; + if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + continue; User *use = *(load->use_begin()); - if (StoreInst * store = dyn_cast<StoreInst>(use)) { + if (StoreInst *store = dyn_cast<StoreInst>(use)) { if (store->getOperand(0) != load) //getValueOperand - continue; + continue; aggrLoads.push_back(load); } - } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) { + } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) { Value *len = intr->getLength(); // If the number of elements being copied is greater // than MaxAggrCopySize, lower it to a loop - if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemcpys.push_back(intr); } @@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // turn variable length memcpy/memmov into loop aggrMemcpys.push_back(intr); } - } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) { + } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) { Value *len = memsetintr->getLength(); - if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemsets.push_back(memsetintr); } @@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { } } } - if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) - && (aggrMemsets.size() == 0)) return false; + if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && + (aggrMemsets.size() == 0)) + return false; // // Do the transformation of an aggr load/copy/set to a loop diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h index b4a4dbce98..a95c16b1e6 100644 --- a/lib/Target/NVPTX/NVPTXNumRegisters.h +++ b/lib/Target/NVPTX/NVPTXNumRegisters.h @@ -11,10 +11,6 @@ #ifndef NVPTX_NUM_REGISTERS_H #define NVPTX_NUM_REGISTERS_H -namespace llvm { - -const unsigned NVPTXNumRegisters = 396; - -} +namespace llvm { const unsigned NVPTXNumRegisters = 396; } #endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 350a2c5551..282465359b 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -23,69 +23,54 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" - using namespace llvm; -namespace llvm -{ -std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { +namespace llvm { +std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return ".f32"; } if (RC == &NVPTX::Float64RegsRegClass) { return ".f64"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return ".s64"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return ".s32"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return ".s16"; } - // Int8Regs become 16-bit registers in PTX - else if (RC == &NVPTX::Int8RegsRegClass) { + // Int8Regs become 16-bit registers in PTX + else if (RC == &NVPTX::Int8RegsRegClass) { return ".s16"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return ".pred"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else { + } else { return "INTERNAL"; } return ""; } -std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { +std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return "%f"; } if (RC == &NVPTX::Float64RegsRegClass) { return "%fd"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return "%rd"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return "%r"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return "%rs"; - } - else if (RC == &NVPTX::Int8RegsRegClass) { + } else if (RC == &NVPTX::Int8RegsRegClass) { return "%rc"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return "%p"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else { + } else { return "INTERNAL"; } return ""; @@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st) - : NVPTXGenRegisterInfo(0), - Is64Bit(st.is64Bit()) {} + : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} #define GET_REGINFO_TARGET_DESC #include "NVPTXGenRegisterInfo.inc" /// NVPTX Callee Saved Registers -const uint16_t* NVPTXRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const { +const uint16_t * +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const uint16_t CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } // NVPTX Callee Saved Reg Classes -const TargetRegisterClass* const* +const TargetRegisterClass *const * NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 }; return CalleeSavedRegClasses; } @@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -void NVPTXRegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { +void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; @@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum+1).getImm(); + MI.getOperand(FIOperandNum + 1).getImm(); // Using I0 as the frame pointer MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } -int NVPTXRegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { +int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return 0; } @@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return NVPTX::VRFrame; } -unsigned NVPTXRegisterInfo::getRARegister() const { - return 0; -} - +unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 69f73f213c..d406820661 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -17,7 +17,6 @@ #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" - #define GET_REGINFO_HEADER #include "NVPTXGenRegisterInfo.inc" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { private: bool Is64Bit; // Hold Strings that can be free'd all together with NVPTXRegisterInfo - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const TargetInstrInfo &tii, - const NVPTXSubtarget &st); - + NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo //------------------------------------------------------ // NVPTX callee saved registers - virtual const uint16_t* + virtual const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const; // NVPTX callee saved register classes - virtual const TargetRegisterClass* const * + virtual const TargetRegisterClass *const * getCalleeSavedRegClasses(const MachineFunction *MF) const; virtual BitVector getReservedRegs(const MachineFunction &MF) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS=NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = NULL) const; virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const; @@ -74,11 +71,9 @@ public: }; - -std::string getNVPTXRegClassName (const TargetRegisterClass *RC); -std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); +std::string getNVPTXRegClassName(const TargetRegisterClass *RC); +std::string getNVPTXRegClassStr(const TargetRegisterClass *RC); } // end namespace llvm - #endif diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp index babe29500d..83dfe12089 100644 --- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -21,9 +21,7 @@ using namespace llvm; -namespace llvm { -FunctionPass *createSplitBBatBarPass(); -} +namespace llvm { FunctionPass *createSplitBBatBarPass(); } char NVPTXSplitBBatBar::ID = 0; @@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) { // This interface will most likely not be necessary, because this pass will // not be invoked by the driver, but will be used as a prerequisite to // another pass. -FunctionPass *llvm::createSplitBBatBarPass() { - return new NVPTXSplitBBatBar(); -} +FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 7b62cce2c6..2dcd73dcff 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -22,27 +22,23 @@ using namespace llvm; // Select Driver Interface #include "llvm/Support/CommandLine.h" namespace { -cl::opt<NVPTX::DrvInterface> -DriverInterface(cl::desc("Choose driver interface:"), - cl::values( - clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), - clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), - clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), - clEnumValEnd), - cl::init(NVPTX::NVCL)); +cl::opt<NVPTX::DrvInterface> DriverInterface( + cl::desc("Choose driver interface:"), + cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), + clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), + clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd), + cl::init(NVPTX::NVCL)); } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -: NVPTXGenSubtargetInfo(TT, CPU, FS), - Is64Bit(is64Bit), - PTXVersion(0), - SmVersion(10) { + : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), + SmVersion(20) { drvInterface = DriverInterface; // Provide the default CPU if none - std::string defCPU = "sm_10"; + std::string defCPU = "sm_20"; ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index beea77e38d..670077daaa 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,7 +25,7 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - + std::string TargetName; NVPTX::DrvInterface drvInterface; bool Is64Bit; @@ -61,13 +61,10 @@ public: bool hasLDU() const { return SmVersion >= 20; } bool hasGenericLdSt() const { return SmVersion >= 20; } inline bool hasHWROT32() const { return false; } - inline bool hasSWROT32() const { - return true; - } - inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; } + inline bool hasSWROT32() const { return true; } + inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } - bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } @@ -96,4 +93,4 @@ public: } // End llvm namespace -#endif // NVPTXSUBTARGET_H +#endif // NVPTXSUBTARGET_H diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index cd765fa8cb..67ca6b58e5 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -45,9 +45,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" - using namespace llvm; +namespace llvm { +void initializeNVVMReflectPass(PassRegistry&); +} extern "C" void LLVMInitializeNVPTXTarget() { // Register the target. @@ -57,52 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() { RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32); RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64); + // FIXME: This pass is really intended to be invoked during IR optimization, + // but it's very NVPTX-specific. + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); } -NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, - StringRef TT, - StringRef CPU, - StringRef FS, - const TargetOptions& Options, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64bit) -: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), - DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) -/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { -} - - +NVPTXTargetMachine::NVPTXTargetMachine( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), + FrameLowering( + *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {} void NVPTXTargetMachine32::anchor() {} -NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { -} +NVPTXTargetMachine32::NVPTXTargetMachine32( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} void NVPTXTargetMachine64::anchor() {} -NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { -} - +NVPTXTargetMachine64::NVPTXTargetMachine64( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} namespace llvm { class NVPTXPassConfig : public TargetPassConfig { public: NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) {} NVPTXTargetMachine &getNVPTXTargetMachine() const { return getTM<NVPTXTargetMachine>(); @@ -126,6 +118,4 @@ bool NVPTXPassConfig::addInstSelector() { return false; } -bool NVPTXPassConfig::addPreRegAlloc() { - return false; -} +bool NVPTXPassConfig::addPreRegAlloc() { return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 1a732be1ad..5fbcf735b4 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef NVPTX_TARGETMACHINE_H #define NVPTX_TARGETMACHINE_H @@ -31,42 +30,40 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { - NVPTXSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - NVPTXInstrInfo InstrInfo; - NVPTXTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + NVPTXSubtarget Subtarget; + const DataLayout DL; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; // NVPTX does not have any call stack frame, but need a NVPTX specific // FrameLowering class because TargetFrameLowering is abstract. - NVPTXFrameLowering FrameLowering; + NVPTXFrameLowering FrameLowering; // Hold Strings that can be free'd all together with NVPTXTargetMachine - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); public: - NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OP, - bool is64bit); + NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL;} - virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} + virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const DataLayout *getDataLayout() const { return &DL; } + virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const NVPTXRegisterInfo *getRegisterInfo() const { return &(InstrInfo.getRegisterInfo()); } virtual NVPTXTargetLowering *getTargetLowering() const { - return const_cast<NVPTXTargetLowering*>(&TLInfo); + return const_cast<NVPTXTargetLowering *>(&TLInfo); } virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { @@ -79,22 +76,19 @@ public: //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); ManagedStringPool *getManagedStrPool() const { - return const_cast<ManagedStringPool*>(&ManagedStrPool); + return const_cast<ManagedStringPool *>(&ManagedStrPool); } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, - JITCodeEmitter &, + virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, bool = true) { return true; } // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, - MCContext *&, - raw_ostream &, + virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) { return true; } @@ -119,7 +113,6 @@ public: CodeGenOpt::Level OL); }; - } // end namespace llvm #endif diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index b5698a2fc0..6ab0e08ad0 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -46,45 +46,43 @@ public: } virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { - TextSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getText()); - DataSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getDataRel()); - BSSSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getBSS()); - ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getReadOnly()); + TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText()); + DataSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel()); + BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS()); + ReadOnlySection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly()); - StaticCtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - StaticDtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - LSDASection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - EHFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfStrSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLocSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); + StaticCtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + StaticDtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + LSDASection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + EHFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfAbbrevSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfPubTypesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfDebugInlineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfStrSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLocSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfARangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfRangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfMacroInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } virtual const MCSection *getSectionForConstant(SectionKind Kind) const { @@ -93,8 +91,7 @@ public: virtual const MCSection * getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, - const TargetMachine &TM) const { + Mangler *Mang, const TargetMachine &TM) const { return DataSection; } diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 1ccc9f7c02..6786eb0224 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t; ManagedStatic<per_module_annot_t> annotationCache; - static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); @@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(prop && "Annotation property not a string"); // value - ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1)); + ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1)); assert(Val && "Value operand not a constant int"); std::string keyname = prop->getString().str(); @@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, bool llvm::isTexture(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], + annot)) { assert((annot == 1) && "Unexpected annotation on a texture symbol"); return true; } @@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) { bool llvm::isSurface(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], + annot)) { assert((annot == 1) && "Unexpected annotation on a surface symbol"); return true; } @@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) { bool llvm::isSampler(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { assert((annot == 1) && "Unexpected annotation on a sampler symbol"); return true; } @@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) { if (const Argument *arg = dyn_cast<Argument>(&val)) { const Function *func = arg->getParent(); std::vector<unsigned> annot; - if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findAllNVVMAnnotation( + func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) { } bool llvm::getMaxNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x)); } bool llvm::getMaxNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y)); } bool llvm::getMaxNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z)); } bool llvm::getReqNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x)); } bool llvm::getReqNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y)); } bool llvm::getReqNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z)); } bool llvm::getMinCTASm(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x)); } bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; - bool retval = llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], - x); + bool retval = llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); if (retval == false) { // There is no NVVM metadata, check the calling convention if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) @@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) { else return false; } - return (x==1); + return (x == 1); } bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; - bool retval = llvm::findAllNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], - Vs); + bool retval = llvm::findAllNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); if (retval == false) return false; - for (int i=0, e=Vs.size(); i<e; i++) { + for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; - if ( (v >> 16) == index ) { - align = v & 0xFFFF; + if ((v >> 16) == index) { + align = v & 0xFFFF; return true; } } @@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { if (MDNode *alignNode = I.getMetadata("callalign")) { - for (int i=0, n = alignNode->getNumOperands(); - i<n; i++) { + for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { if (const ConstantInt *CI = - dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + dyn_cast<ConstantInt>(alignNode->getOperand(i))) { unsigned v = CI->getZExtValue(); - if ( (v>>16) == index ) { + if ((v >> 16) == index) { align = v & 0xFFFF; return true; } - if ( (v>>16) > index ) { + if ((v >> 16) > index) { return false; } } @@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { // consider several special intrinsics in striping pointer casts, and // provide an option to ignore GEP indicies for find out the base address only // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - bool ignore_GEP_indices) { +const Value * +llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { V = V->stripPointerCasts(); while (true) { if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { @@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V, // - ignore GEP indicies for find out the base address only, and // - tracking PHINode // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - std::set<const Value *> &processed) { +const Value * +llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) { if (processed.find(V) != processed.end()) return NULL; processed.insert(V); @@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V, return V; } - // The following are some useful utilities for debuggung BasicBlock *llvm::getParentBlock(Value *v) { diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 247e09b8bc..a208004297 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -23,8 +23,7 @@ #include <string> #include <vector> -namespace llvm -{ +namespace llvm { #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" @@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID); /// to pass into type construction of CallInst ctors. This turns a null /// terminated list of pointers (or other value types) into a real live vector. /// -template<typename T> -inline std::vector<T> make_vector(T A, ...) { +template <typename T> inline std::vector<T> make_vector(T A, ...) { va_list Args; va_start(Args, A); std::vector<T> Result; @@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) { bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); -const Value *skipPointerTransfer(const Value *V, - std::set<const Value *> &processed); +const Value * +skipPointerTransfer(const Value *V, std::set<const Value *> &processed); BasicBlock *getParentBlock(Value *v); Function *getParentFunction(Value *v); void dumpBlock(Value *v, char *blockName); diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp index 6a0e5328f6..5f074b33a2 100644 --- a/lib/Target/NVPTX/NVPTXutil.cpp +++ b/lib/Target/NVPTX/NVPTXutil.cpp @@ -18,8 +18,7 @@ using namespace llvm; namespace llvm { -bool isParamLoad(const MachineInstr *MI) -{ +bool isParamLoad(const MachineInstr *MI) { if ((MI->getOpcode() != NVPTX::LD_i32_avar) && (MI->getOpcode() != NVPTX::LD_i64_avar)) return false; @@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI) return true; } -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 +#define DATA_MASK 0x7f +#define DIGIT_WIDTH 7 +#define MORE_BYTES 0x80 -static int encode_leb128(uint64_t val, int *nbytes, - char *space, int splen) -{ +static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { char *a; char *end = space + splen; @@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes, #undef DIGIT_WIDTH #undef MORE_BYTES -uint64_t encode_leb128(const char *str) -{ - union { uint64_t x; char a[8]; } temp64; +uint64_t encode_leb128(const char *str) { + union { + uint64_t x; + char a[8]; + } temp64; temp64.x = 0; - for (unsigned i=0,e=strlen(str); i!=e; ++i) - temp64.a[i] = str[e-1-i]; + for (unsigned i = 0, e = strlen(str); i != e; ++i) + temp64.a[i] = str[e - 1 - i]; char encoded[16]; int nbytes; int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - (void)retval; - assert(retval == 0 && - "Encoding to leb128 failed"); + (void) retval; + assert(retval == 0 && "Encoding to leb128 failed"); assert(nbytes <= 8 && "Cannot support register names with leb128 encoding > 8 bytes"); temp64.x = 0; - for (int i=0; i<nbytes; ++i) + for (int i = 0; i < nbytes; ++i) temp64.a[i] = encoded[i]; return temp64.x; diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp new file mode 100644 index 0000000000..3bbd1a13da --- /dev/null +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -0,0 +1,193 @@ +//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces occurences of __nvvm_reflect("string") with an +// integer based on -nvvm-reflect-list string=<int> option given to this pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include "llvm/Pass.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <map> +#include <sstream> +#include <string> +#include <vector> + +#define NVVM_REFLECT_FUNCTION "__nvvm_reflect" + +using namespace llvm; + +namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } + +namespace { +class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass { +private: + //std::map<std::string, int> VarMap; + StringMap<int> VarMap; + typedef std::map<std::string, int>::iterator VarMapIter; + Function *reflectFunction; + +public: + static char ID; + NVVMReflect() : ModulePass(ID) { + VarMap.clear(); + reflectFunction = 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + virtual bool runOnModule(Module &); + + void setVarMap(); +}; +} + +static cl::opt<bool> +NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), + cl::desc("NVVM reflection, enabled by default")); + +char NVVMReflect::ID = 0; +INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", + "Replace occurences of __nvvm_reflect() calls with 0/1", false, + false) + +static cl::list<std::string> +ReflectList("nvvm-reflect-list", cl::value_desc("name=0/1"), + cl::desc("A list of string=num assignments, where num=0 or 1"), + cl::ValueRequired); + +/// This function does the same operation as perl's split. +/// For example, calling this with ("a=1,b=2,c=0", ",") will +/// return ["a=1", "b=2", "c=0"] in the return std::vector. +static std::vector<std::string> +Tokenize(const std::string &str, const std::string &delim) { + std::vector<std::string> tokens; + + size_t p0 = 0, p1 = std::string::npos; + while (p0 != std::string::npos) { + p1 = str.find_first_of(delim, p0); + if (p1 != p0) { + std::string token = str.substr(p0, p1 - p0); + tokens.push_back(token); + } + p0 = str.find_first_not_of(delim, p1); + } + + return tokens; +} + +/// The command line can look as follows : +/// -R a=1,b=2 -R c=3,d=0 -R e=2 +/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the +/// ReflectList vector. First, each of ReflectList[i] is 'split' +/// using "," as the delimiter. Then each of this part is split +/// using "=" as the delimiter. +void NVVMReflect::setVarMap() { + for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { + // DEBUG(dbgs() << "Option : " << ReflectList[i] << std::endl); + std::vector<std::string> nameValList = Tokenize(ReflectList[i], ","); + for (unsigned j = 0, ej = nameValList.size(); j != ej; ++j) { + std::vector<std::string> nameValPair = Tokenize(nameValList[j], "="); + assert(nameValPair.size() == 2 && "name=val expected"); + std::stringstream valstream(nameValPair[1]); + int val; + valstream >> val; + assert((!(valstream.fail())) && "integer value expected"); + VarMap[nameValPair[0]] = val; + } + } +} + +bool NVVMReflect::runOnModule(Module &M) { + if (!NVVMReflectEnabled) + return false; + + setVarMap(); + + reflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); + + // If reflect function is not used, then there will be + // no entry in the module. + if (reflectFunction == 0) { + return false; + } + + // Validate _reflect function + assert(reflectFunction->isDeclaration() && + "_reflect function should not have a body"); + assert(reflectFunction->getReturnType()->isIntegerTy() && + "_reflect's return type should be integer"); + + std::vector<Instruction *> toRemove; + + // Go through the uses of reflectFunction in this Function. + // Each of them should a CallInst with a ConstantArray argument. + // First validate that. If the c-string corresponding to the + // ConstantArray can be found successfully, see if it can be + // found in VarMap. If so, replace the uses of CallInst with the + // value found in VarMap. If not, replace the use with value 0. + for (Value::use_iterator iter = reflectFunction->use_begin(), + iterEnd = reflectFunction->use_end(); + iter != iterEnd; ++iter) { + assert(isa<CallInst>(*iter) && "Only a call instruction can use _reflect"); + CallInst *reflect = cast<CallInst>(*iter); + + assert((reflect->getNumOperands() == 2) && + "Only one operand expect for _reflect function"); + // In cuda, we will have an extra constant-to-generic conversion of + // the string. + const Value *conv = reflect->getArgOperand(0); + assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion"); + const CallInst *convcall = cast<CallInst>(conv); + const Value *str = convcall->getArgOperand(0); + assert(isa<ConstantExpr>(str) && + "Format of _reflect function not recognized"); + const ConstantExpr *gep = cast<ConstantExpr>(str); + + const Value *sym = gep->getOperand(0); + assert(isa<Constant>(sym) && "Format of _reflect function not recognized"); + + const Constant *symstr = cast<Constant>(sym); + + assert(isa<ConstantDataSequential>(symstr->getOperand(0)) && + "Format of _reflect function not recognized"); + + assert(cast<ConstantDataSequential>(symstr->getOperand(0))->isCString() && + "Format of _reflect function not recognized"); + + std::string reflectArg = + cast<ConstantDataSequential>(symstr->getOperand(0))->getAsString(); + + reflectArg = reflectArg.substr(0, reflectArg.size() - 1); + // DEBUG(dbgs() << "Arg of _reflect : " << reflectArg << std::endl); + + int reflectVal = 0; // The default value is 0 + if (VarMap.find(reflectArg) != VarMap.end()) { + reflectVal = VarMap[reflectArg]; + } + reflect->replaceAllUsesWith( + ConstantInt::get(reflect->getType(), reflectVal)); + toRemove.push_back(reflect); + } + if (toRemove.size() == 0) + return false; + + for (unsigned i = 0, e = toRemove.size(); i != e; ++i) + toRemove[i]->eraseFromParent(); + return true; +} diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp index 6c801b875e..cc7d4dc5ec 100644 --- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp +++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp @@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64; extern "C" void LLVMInitializeNVPTXTargetInfo() { RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx", - "NVIDIA PTX 32-bit"); + "NVIDIA PTX 32-bit"); RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64", - "NVIDIA PTX 64-bit"); + "NVIDIA PTX 64-bit"); } diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h index a7347efd78..45cc0b8b67 100644 --- a/lib/Target/NVPTX/cl_common_defines.h +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -24,22 +24,21 @@ enum { CLK_LUMINANCE = 0x10B9 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - , + , CLK_Rx = 0x10BA, CLK_RGx = 0x10BB, CLK_RGBx = 0x10BC #endif }; - typedef enum clk_channel_type { // valid formats for float return types - CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 - CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 - CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 - CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 - CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half - CLK_FLOAT = 0x10DE, // four channel RGBA float + CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 + CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 + CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 + CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 + CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half + CLK_FLOAT = 0x10DE, // four channel RGBA float #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) CLK_UNORM_SHORT_565 = 0x10D4, @@ -48,7 +47,7 @@ typedef enum clk_channel_type { #endif // valid only for integer return types - CLK_SIGNED_INT8 = 0x10D7, + CLK_SIGNED_INT8 = 0x10D7, CLK_SIGNED_INT16 = 0x10D8, CLK_SIGNED_INT32 = 0x10D9, CLK_UNSIGNED_INT8 = 0x10DA, @@ -56,70 +55,68 @@ typedef enum clk_channel_type { CLK_UNSIGNED_INT32 = 0x10DC, // CI SPI for CPU - __CLK_UNORM_INT8888 , // four channel ARGB unorm8 - __CLK_UNORM_INT8888R, // four channel BGRA unorm8 + __CLK_UNORM_INT8888, // four channel ARGB unorm8 + __CLK_UNORM_INT8888R, // four channel BGRA unorm8 __CLK_VALID_IMAGE_TYPE_COUNT, __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, - __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to - // represent any image type - __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 -}clk_channel_type; + __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to + // represent any image type + __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1 +} clk_channel_type; typedef enum clk_sampler_type { - __CLK_ADDRESS_BASE = 0, - CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, + __CLK_ADDRESS_BASE = 0, + CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, + CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, #endif - __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | - CLK_ADDRESS_CLAMP_TO_EDGE | - CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, - __CLK_ADDRESS_BITS = 3, // number of bits required to - // represent address info - - __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, - CLK_NORMALIZED_COORDS_FALSE = 0, - CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, - __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE | - CLK_NORMALIZED_COORDS_TRUE, - __CLK_NORMALIZED_BITS = 1, // number of bits required to - // represent normalization - - __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + - __CLK_NORMALIZED_BITS, - CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, - CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, - CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, - __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | - CLK_FILTER_ANISOTROPIC, - __CLK_FILTER_BITS = 2, // number of bits required to - // represent address info - - __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, - CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, - CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, - CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, - __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | - CLK_MIP_ANISOTROPIC, - __CLK_MIP_BITS = 2, - - __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, - __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | - __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, - - __CLK_ANISOTROPIC_RATIO_BITS = 5, - __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >> - (__CLK_ANISOTROPIC_RATIO_BITS-1) + __CLK_ADDRESS_MASK = + CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, + __CLK_ADDRESS_BITS = 3, // number of bits required to + // represent address info + + __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, + CLK_NORMALIZED_COORDS_FALSE = 0, + CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, + __CLK_NORMALIZED_MASK = + CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE, + __CLK_NORMALIZED_BITS = 1, // number of bits required to + // represent normalization + + __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS, + CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, + CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, + CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, + __CLK_FILTER_MASK = + CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC, + __CLK_FILTER_BITS = 2, // number of bits required to + // represent address info + + __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, + CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, + CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, + CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, + __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC, + __CLK_MIP_BITS = 2, + + __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, + __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | + __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, + + __CLK_ANISOTROPIC_RATIO_BITS = 5, + __CLK_ANISOTROPIC_RATIO_MASK = + (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1) } clk_sampler_type; // Memory synchronization -#define CLK_LOCAL_MEM_FENCE (1 << 0) -#define CLK_GLOBAL_MEM_FENCE (1 << 1) +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) #endif // __CL_COMMON_DEFINES_H__ diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 3d583060d1..bacc108c62 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" -#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); - if (!Modifier) { - unsigned CCReg = MI->getOperand(OpNo+1).getReg(); - unsigned RegNo; - switch (CCReg) { - default: llvm_unreachable("Unknown CR register"); - case PPC::CR0: RegNo = 0; break; - case PPC::CR1: RegNo = 1; break; - case PPC::CR2: RegNo = 2; break; - case PPC::CR3: RegNo = 3; break; - case PPC::CR4: RegNo = 4; break; - case PPC::CR5: RegNo = 5; break; - case PPC::CR6: RegNo = 6; break; - case PPC::CR7: RegNo = 7; break; - } - - // Print the CR bit number. The Code is ((BI << 5) | BO) for a - // BCC, but we must have the positive form here (BO == 12) - unsigned BI = Code >> 5; - assert((Code & 0xF) == 12 && - "BO in predicate bit must have the positive form"); - - unsigned Value = 4*RegNo + BI; - O << Value; - return; - } if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - case PPC::PRED_ALWAYS: return; // Don't print anything for always. case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; case PPC::PRED_EQ: O << "eq"; return; @@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, assert(StringRef(Modifier) == "reg" && "Need to specify 'cc' or 'reg' as predicate op modifier!"); - // Don't print the register for 'always'. - if (Code == PPC::PRED_ALWAYS) return; printOperand(MI, OpNo+1, O); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index f24edf62ed..ec2657403e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -30,13 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_2: case FK_Data_4: case FK_Data_8: - case PPC::fixup_ppc_toc: case PPC::fixup_ppc_tlsreg: case PPC::fixup_ppc_nofixup: return Value; - case PPC::fixup_ppc_lo14: - case PPC::fixup_ppc_toc16_ds: - return (Value & 0xffff) << 2; case PPC::fixup_ppc_brcond14: return Value & 0xfffc; case PPC::fixup_ppc_br24: @@ -48,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_ha16: return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_toc16: return Value & 0xffff; + case PPC::fixup_ppc_lo16_ds: + return Value & 0xfffc; } } @@ -82,10 +79,7 @@ public: { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_lo16", 16, 16, 0 }, { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo14", 16, 14, 0 }, - { "fixup_ppc_toc", 0, 64, 0 }, - { "fixup_ppc_toc16", 16, 16, 0 }, - { "fixup_ppc_toc16_ds", 16, 14, 0 }, + { "fixup_ppc_lo16_ds", 16, 14, 0 }, { "fixup_ppc_tlsreg", 0, 0, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h deleted file mode 100644 index 9c975c089e..0000000000 --- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h +++ /dev/null @@ -1,70 +0,0 @@ -//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the PPC target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef PPCBASEINFO_H -#define PPCBASEINFO_H - -#include "PPCMCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -/// getPPCRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) { - using namespace PPC; - switch (RegEnum) { - case 0: return 0; - case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; - case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; - case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; - case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; - case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; - case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; - case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; - case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; - case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; - case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; - case R10: case X10: case F10: case V10: case CR2EQ: return 10; - case R11: case X11: case F11: case V11: case CR2UN: return 11; - case R12: case X12: case F12: case V12: case CR3LT: return 12; - case R13: case X13: case F13: case V13: case CR3GT: return 13; - case R14: case X14: case F14: case V14: case CR3EQ: return 14; - case R15: case X15: case F15: case V15: case CR3UN: return 15; - case R16: case X16: case F16: case V16: case CR4LT: return 16; - case R17: case X17: case F17: case V17: case CR4GT: return 17; - case R18: case X18: case F18: case V18: case CR4EQ: return 18; - case R19: case X19: case F19: case V19: case CR4UN: return 19; - case R20: case X20: case F20: case V20: case CR5LT: return 20; - case R21: case X21: case F21: case V21: case CR5GT: return 21; - case R22: case X22: case F22: case V22: case CR5EQ: return 22; - case R23: case X23: case F23: case V23: case CR5UN: return 23; - case R24: case X24: case F24: case V24: case CR6LT: return 24; - case R25: case X25: case F25: case V25: case CR6GT: return 25; - case R26: case X26: case F26: case V26: case CR6EQ: return 26; - case R27: case X27: case F27: case V27: case CR6UN: return 27; - case R28: case X28: case F28: case V28: case CR7LT: return 28; - case R29: case X29: case F29: case V29: case CR7GT: return 29; - case R30: case X30: case F30: case V30: case CR7EQ: return 30; - case R31: case X31: case F31: case V31: case CR7UN: return 31; - default: - llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); - } -} - -} // end namespace llvm; - -#endif diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 61868d446f..84e4175e63 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -133,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_LO; break; + case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + Type = ELF::R_PPC64_TOC16; + break; case MCSymbolRefExpr::VK_PPC_TOC16_LO: Type = ELF::R_PPC64_TOC16_LO; break; @@ -144,35 +147,12 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, break; } break; - case PPC::fixup_ppc_lo14: - Type = ELF::R_PPC_ADDR14; - break; - case PPC::fixup_ppc_toc: - Type = ELF::R_PPC64_TOC; - break; - case PPC::fixup_ppc_toc16: + case PPC::fixup_ppc_lo16_ds: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_PPC_TPREL16_LO: - Type = ELF::R_PPC64_TPREL16_LO; - break; - case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: - Type = ELF::R_PPC64_DTPREL16_LO; - break; case MCSymbolRefExpr::VK_None: - Type = ELF::R_PPC64_TOC16; - break; - case MCSymbolRefExpr::VK_PPC_TOC16_LO: - Type = ELF::R_PPC64_TOC16_LO; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: - Type = ELF::R_PPC64_GOT_TLSLD16_LO; + Type = ELF::R_PPC64_ADDR16_DS; break; - } - break; - case PPC::fixup_ppc_toc16_ds: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: Type = ELF::R_PPC64_TOC16_DS; break; @@ -253,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_toc16: - case PPC::fixup_ppc_toc16_ds: + case PPC::fixup_ppc_lo16_ds: RelocOffset += 2; break; default: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 7917f7736e..86c44f57a5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { @@ -31,19 +33,9 @@ enum Fixups { /// like 'lis'. fixup_ppc_ha16, - /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs - /// like 'std'. - fixup_ppc_lo14, - - /// fixup_ppc_toc - Insert value of TOC base (.TOC.). - fixup_ppc_toc, - - /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base. - fixup_ppc_toc16, - - /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with - /// implied 2 zero bits - fixup_ppc_toc16_ds, + /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with + /// implied 2 zero bits for instrs like 'std'. + fixup_ppc_lo16_ds, /// fixup_ppc_tlsreg - Insert thread-pointer register number. fixup_ppc_tlsreg, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index d048426d43..2223cd623c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -13,10 +13,10 @@ #define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/PPCMCTargetDesc.h" -#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -33,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter { void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCSubtargetInfo &STI; + const MCContext &CTX; Triple TT; public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) - : STI(sti), TT(STI.getTargetTriple()) { + : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) { } ~PPCMCCodeEmitter() {} - bool is64BitMode() const { - return (STI.getFeatureBits() & PPC::Feature64Bit) != 0; - } - - bool isSVR4ABI() const { - return TT.isMacOSX() == 0; - } - unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, @@ -81,12 +74,11 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); - // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the - // following 'nop'. + // BL8_NOP etc. all have a size of 8 because of the following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF || - Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) + if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || + Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) Size = 8; // Output the constant in big endian byte order. @@ -121,11 +113,11 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, (MCFixupKind)PPC::fixup_ppc_br24)); // For special TLS calls, add another fixup for the symbol. Apparently - // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently + // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently // similar that TblGen will not generate a separate case for the latter // two, so this is the only way to get the extra fixup generated. unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) { + if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) { const MCOperand &MO2 = MI.getOperand(OpNo+1); Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); @@ -178,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - if (isSVR4ABI() && is64BitMode()) - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16)); - else - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -199,13 +187,9 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, if (MO.isImm()) return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; - // Add a fixup for the branch target. - if (isSVR4ABI() && is64BitMode()) - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16_ds)); - else - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo14)); + // Add a fixup for the displacement field. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16_ds)); return RegBits; } @@ -220,7 +204,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_tlsreg)); - return getPPCRegisterNumbering(PPC::X13); + return CTX.getRegisterInfo().getEncodingValue(PPC::X13); } unsigned PPCMCCodeEmitter:: @@ -231,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MTCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> getPPCRegisterNumbering(MO.getReg()); + return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } @@ -243,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return getPPCRegisterNumbering(MO.getReg()); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 4a420929d0..38a7420d97 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 12bb0a1434..d84eb9c6aa 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -18,7 +18,6 @@ using namespace llvm; PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { switch (Opcode) { - default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_NE; case PPC::PRED_NE: return PPC::PRED_EQ; case PPC::PRED_LT: return PPC::PRED_GE; @@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_UN; case PPC::PRED_UN: return PPC::PRED_NU; } + llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 972e13852e..ad2b018128 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -17,11 +17,14 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. enum Predicate { - PRED_ALWAYS = (0 << 5) | 20, PRED_LT = (0 << 5) | 12, PRED_LE = (1 << 5) | 4, PRED_EQ = (2 << 5) | 12, diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index f71979f245..446b6854fb 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H -#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 992913602a..a1ea2297bf 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -59,8 +59,18 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", + "Enable the lfiwax instruction">; +def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", + "Enable the fri[mnpz] instructions">; +def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", + "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; +def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", + "Enable the popcnt[dw] instructions">; +def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", + "Enable the ldbrx instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", @@ -71,15 +81,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // // CMPB p6, p6x, p7 cmpb // DFP p6, p6x, p7 decimal floating-point instructions -// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz -// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz // FRE p5 through p7 fre (vs. fres, available since p3) // FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) -// LDBRX p7 load with byte reversal -// LFIWAX p6, p6x, p7 lfiwax -// LFIWZX p7 lfiwzx // POPCNTB p5 through p7 popcntb and related instructions -// POPCNTD p7 popcntd and related instructions // RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates // VSX p7 vector-scalar instruction set @@ -128,16 +132,18 @@ def : ProcessorModel<"e500mc", PPCE500mcModel, def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - Feature64Bit - /*, Feature64BitRegs */]>; -def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - Feature64Bit /*, Feature64BitRegs */, - FeatureQPX]>; +def : Processor<"a2", PPCA2Itineraries, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */]>; +def : Processor<"a2q", PPCA2Itineraries, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */, FeatureQPX]>; def : Processor<"pwr3", G5Itineraries, [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; @@ -149,18 +155,23 @@ def : Processor<"pwr5", G5Itineraries, FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr5x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, + Feature64Bit]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"pwr6x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, + FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index eae9b7b7fb..74cc1bb762 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -370,7 +370,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // Emit the 'bl'. - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here. + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); @@ -458,11 +458,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = LDtocL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to LDrs, which is a form of LD with the offset - // specified by a SymbolLo. If the global address is external, has + // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the // associated TOC entry. Otherwise reference the symbol directly. - TmpInst.setOpcode(PPC::LDrs); + TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && "Invalid operand for LDtocL!"); @@ -496,10 +495,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = ADDItocL %Xs, <ga:@sym> LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to ADDI8L. If the global address is external, then + // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the // symbol directly. - TmpInst.setOpcode(PPC::ADDI8L); + TmpInst.setOpcode(PPC::ADDI8); const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); MCSymbol *MOSymbol = 0; @@ -548,9 +547,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to LDrs, which is a form of LD with the offset - // specified by a SymbolLo. - TmpInst.setOpcode(PPC::LDrs); + // Change the opcode to LD. + TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); @@ -579,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItlsgdL: { // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> - // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l + // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -587,7 +585,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -595,7 +593,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::GETtlsADDR: { // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> - // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd) + // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd) assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); StringRef Name = "__tls_get_addr"; @@ -608,7 +606,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -631,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItlsldL: { // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> - // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l + // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -639,7 +637,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -647,7 +645,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::GETtlsldADDR: { // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> - // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld) + // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld) assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); StringRef Name = "__tls_get_addr"; @@ -660,7 +658,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -683,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDIdtprelL: { // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> - // Into: %Xd = ADDI8L %Xs, sym@dtprel@l + // Into: %Xd = ADDI8 %Xs, sym@dtprel@l assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -691,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -911,18 +909,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext); + // mflr r0 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); - // FIXME: MCize this. - OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName())); + // bcl 20, 31, AnonSymbol + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCL).addExpr(Anon)); OutStreamer.EmitLabel(AnonSymbol); // mflr r11 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) const MCExpr *Sub = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext), - MCSymbolRefExpr::Create(AnonSymbol, OutContext), - OutContext); + Anon, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc489f6..81a54d7015 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + int64_t start = DefInstr->getOperand(2).getImm(); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, @@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index caeb1796f7..c8a29a3d2c 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -136,3 +136,9 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV F27, F28, F29, F30, F31, CR2, CR3, CR4, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; + +def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>; +def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>; + +def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; + diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index d68bfd12e4..6478718513 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -142,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> getPPCRegisterNumbering(MO.getReg()); + return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, @@ -260,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return getPPCRegisterNumbering(MO.getReg()); + return TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a396e6693..3244b904ee 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // transform this into the appropriate ORI instruction. static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); unsigned UsedRegMask = 0; @@ -115,7 +116,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { for (MachineRegisterInfo::livein_iterator I = MF->getRegInfo().livein_begin(), E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(I->first); + unsigned RegNo = TRI->getEncodingValue(I->first); if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } @@ -131,7 +132,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { const MachineOperand &MO = Ret.getOperand(I); if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) continue; - unsigned RegNo = getPPCRegisterNumbering(MO.getReg()); + unsigned RegNo = TRI->getEncodingValue(MO.getReg()); UsedRegMask &= ~(1 << (31-RegNo)); } } @@ -188,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool spillsVRSAVE(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isVRSAVESpilled(); +} + +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -281,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } +void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { + bool is31 = needsFP(MF); + unsigned FPReg = is31 ? PPC::R31 : PPC::R1; + unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) + for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + --MBBI; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg()) + continue; + + switch (MO.getReg()) { + case PPC::FP: + MO.setReg(FPReg); + break; + case PPC::FP8: + MO.setReg(FP8Reg); + break; + } + } + } +} void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -311,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (MFI->isFrameAddressTaken()) + replaceFPWithRealFP(MF); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get operating system @@ -780,7 +828,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +870,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -917,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); int64_t LowerBound = 0; @@ -936,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8; + LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; } // Check whether the frame pointer register is allocated. If so, make sure it @@ -970,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinReg = - std::min<unsigned>(getPPCRegisterNumbering(MinGPR), - getPPCRegisterNumbering(MinG8R)); + std::min<unsigned>(TRI->getEncodingValue(MinGPR), + TRI->getEncodingValue(MinG8R)); if (Subtarget.isPPC64()) { LowerBound -= (31 - MinReg + 1) * 8; @@ -1031,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || + hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + // These kinds of spills might need two registers. + if (spillsCR(MF) || spillsVRSAVE(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + } } bool @@ -1068,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // save slot via GPR12 (available in the prolog for 32- and 64-bit). if (Subtarget.isPPC64()) { // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8)) .addReg(PPC::X12, getKillRegState(true)) .addImm(8) @@ -1109,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, if (isPPC64) { // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12) .addImm(8) .addReg(PPC::X1)); RestoreOp = PPC::MTCRF8; @@ -1125,15 +1198,15 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, if (CR2Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); if (CR3Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(!CR4Spilled))); if (CR4Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(true))); } void PPCFrameLowering:: diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d09e47fafd..6f5f9368c6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -41,10 +43,13 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; + void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -144,6 +149,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 6ed1fb9e6a..4bf1e33964 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) { } // Do not allow MTCTR and BCTRL to be in the same dispatch group. - if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) + if (HasCTRSet && Opcode == PPC::BCTRL) return NoopHazard; // If this is a load following a store, make sure it's not to the same or diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 17bea8a6a6..95efc11b53 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -120,10 +120,10 @@ namespace { } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc - /// immediate field. Because preinc imms have already been validated, just - /// accept it. + /// immediate field. Note that the operand at this point is already the + /// result of a prior SelectAddressRegImm call. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + if (N.getOpcode() == ISD::TargetConstant || N.getOpcode() == ISD::TargetGlobalAddress) { Out = N; return true; @@ -132,18 +132,6 @@ namespace { return false; } - /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc - /// index field. Because preinc imms have already been validated, just - /// accept it. - bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || - N.getOpcode() == ISD::TargetGlobalAddress) - return false; - - Out = N; - return true; - } - /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. @@ -164,6 +152,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -1050,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; SDValue Offset = LD->getOffset(); - if (isa<ConstantSDNode>(Offset) || + if (Offset.getOpcode() == ISD::TargetConstant || Offset.getOpcode() == ISD::TargetGlobalAddress) { unsigned Opcode; @@ -1117,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); - SDValue Ops[] = { Offset, Base, Chain }; + SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); @@ -1483,8 +1477,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { default: continue; case PPC::ADDI8: - case PPC::ADDI8L: - case PPC::ADDIL: + case PPC::ADDI: // In some cases (such as TLS) the relocation information // is already in place on the operand, so copying the operand // is sufficient. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13cb358fc0..2cceb3d312 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); + PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -154,18 +158,45 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFPRND()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + + // frin does not implement "ties to even." Thus, this is safe only in + // fast-math mode. + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + + // These need to set FE_INEXACT, and use a custom inserter. + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + } + } + // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); - setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + if (Subtarget->hasPOPCNTD()) { + setOperationAction(ISD::CTPOP, MVT::i32 , Legal); + setOperationAction(ISD::CTPOP, MVT::i64 , Legal); + } else { + setOperationAction(ISD::CTPOP, MVT::i32 , Expand); + setOperationAction(ISD::CTPOP, MVT::i64 , Expand); + } + // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); @@ -208,6 +239,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. @@ -287,15 +326,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - // FIXME: disable this lowered code. This generates 64-bit register values, - // and we don't model the fact that the top part is clobbered by calls. We - // need to flag these together so that the value isn't live across a call. - //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } + // With the instructions enabled under FPCVT, we can do everything. + if (PPCSubTarget.hasFPCVT()) { + if (Subtarget->has64BitSupport()) { + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + } + + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + } + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); @@ -508,7 +560,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) MaxStoresPerMemmoveOptSize = 8; setPrefFunctionAlignment(4); - BenefitFromCodePlacementOpt = true; } } @@ -554,16 +605,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; - case PPCISD::STD_32: return "PPCISD::STD_32"; - case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; - case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; - case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; - case PPCISD::NOP: return "PPCISD::NOP"; + case PPCISD::CALL: return "PPCISD::CALL"; + case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; - case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; + case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; + case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; @@ -573,10 +621,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; - case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; @@ -1028,7 +1073,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1077,7 +1122,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1140,7 +1185,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1178,15 +1223,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; + bool isLoad = true; SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); + isLoad = false; } else return false; @@ -1194,7 +1243,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { + + // Common code will reject creating a pre-inc form if the base pointer + // is a frame index, or if N is a store and the base pointer is either + // the same as or a predecessor of the value being stored. Check for + // those situations here, and try with swapped Base/Offset instead. + bool Swap = false; + + if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) + Swap = true; + else if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) + Swap = true; + } + + if (Swap) + std::swap(Base, Offset); + AM = ISD::PRE_INC; return true; } @@ -1205,6 +1272,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -3096,7 +3167,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. - unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; + unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { @@ -3229,8 +3300,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Other); NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); - CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; + CallOpc = PPCISD::BCTRL; Callee.setNode(0); + // Add use of X11 (holding environment pointer) + if (isSVR4ABI && isPPC64) + Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); @@ -3369,7 +3443,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - if (CallOpc == PPCISD::BCTRL_SVR4) { + if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function @@ -3380,9 +3454,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { // Otherwise insert NOP for non-local calls. - CallOpc = PPCISD::CALL_NOP_SVR4; + CallOpc = PPCISD::CALL_NOP; } } @@ -4555,6 +4629,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } +SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -4642,37 +4731,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - PPCISD::FCTIDZ, + (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : + PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); + assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. - SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); + bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && + (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); + SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); + int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Chain; + if (i32Stack) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); + SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; + Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + MVT::i32, MMO); + } else + Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, + MPI, false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. - if (Op.getValueType() == MVT::i32) + if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), + MPI = MachinePointerInfo(); + } + + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, false, false, false, 0); } -SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, +SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + "UINT_TO_FP is supported only with FPCVT"); + + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-prcision and then round. + unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDUS : PPCISD::FCFIDS) : + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDU : PPCISD::FCFID); + MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT::f32 : MVT::f64; + if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert @@ -4686,6 +4810,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && + !PPCSubTarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this @@ -4719,44 +4844,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); + + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && - "Unhandled SINT_TO_FP type in custom expander!"); + "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, + SDValue Ld; + if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { + int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && + "Expected an i32 store"); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, 4, 4); + SDValue Ops[] = { Store, FIdx }; + Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::LFIWZX : PPCISD::LFIWAX, + dl, DAG.getVTList(MVT::f64, MVT::Other), + Ops, 2, MVT::i32, MMO); + } else { + assert(PPCSubTarget.isPPC64() && + "i32->FP without LFIWAX supported only on PPC64"); + + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Op.getOperand(0)); - // STD the extended value into the stack slot. - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, 8, 8); - SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; - SDValue Store = - DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), - Ops, 4, MVT::i64, MMO); - // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), - false, false, false, 0); + // STD the extended value into the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + // Load the value as a double. + Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, false, 0); + } // FCFID it and return it. - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -5551,11 +5701,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, Op.getDebugLoc()); - case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. @@ -5609,50 +5763,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1)); - // This sequence changes FPSCR to do round-to-zero, adds the two halves - // of the long double, and puts FPSCR back the way it was. We do not - // actually model FPSCR. - std::vector<EVT> NodeTys; - SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; - - NodeTys.push_back(MVT::f64); // Return register - NodeTys.push_back(MVT::Glue); // Returns a flag for later insns - Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); - MFFSreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(31, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(30, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); // result of add - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = Lo; - Ops[1] = Hi; - Ops[2] = InFlag; - Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); - FPreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); - Ops[0] = DAG.getConstant(1, MVT::i32); - Ops[1] = MFFSreg; - Ops[2] = FPreg; - Ops[3] = InFlag; - Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); - FPreg = Result.getValue(0); + // Add the two halves of the long double in round-to-zero mode. + SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // We know the low half is about to be thrown away, so just use something // convenient. @@ -5744,7 +5856,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -5863,9 +5975,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } +llvm::MachineBasicBlock* +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + // For v = setjmp(buf), we generate + // + // thisMBB: + // SjLjSetup mainMBB + // bl mainMBB + // v_restore = 1 + // b sinkMBB + // + // mainMBB: + // buf[LabelOffset] = LR + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Note that the structure of the jmp_buf used here is not compatible + // with that used by libc, and is not designed to be. Specifically, it + // stores only those 'reserved' registers that LLVM does not otherwise + // understand how to spill. Also, by convention, by the time this + // intrinsic is called, Clang has already stored the frame address in the + // first slot of the buffer and stack address in the third. Following the + // X86 target code, we'll store the jump address in the second slot. We also + // need to save the TOC pointer (R2) to handle jumps between shared + // libraries, and that will be stored in the fourth slot. The thread + // identifier (R13) is not affected. + + // thisMBB: + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + // Prepare IP either in reg. + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned LabelReg = MRI.createVirtualRegister(PtrRC); + unsigned BufReg = MI->getOperand(1).getReg(); + + if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) + .addReg(PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB); + MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + + BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) + .addMBB(mainMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); + + thisMBB->addSuccessor(mainMBB, /* weight */ 0); + thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + + // mainMBB: + // mainDstReg = 0 + MIB = BuildMI(mainMBB, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + + // Store IP + if (PPCSubTarget.isPPC64()) { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) + .addReg(LabelReg) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) + .addReg(LabelReg) + .addImm(LabelOffset) + .addReg(BufReg); + } + + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(PPC::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(thisMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; + unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + unsigned BufReg = MI->getOperand(0).getReg(); + + // Reload FP (the jumped-to function may not have had a + // frame pointer, and if so, then its r31 will be restored + // as necessary). + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) + .addImm(0) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) + .addImm(0) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload IP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) + .addImm(LabelOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload SP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) + .addImm(SPOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) + .addImm(SPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // FIXME: When we also support base pointers, that register must also be + // restored here. + + // Reload TOC + if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Jump + BuildMI(*MBB, MI, DL, + TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); + BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + return emitEHSjLjSetJmp(MI, BB); + } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + return emitEHSjLjLongJmp(MI, BB); + } + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -5883,24 +6224,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); - // The SelectPred is ((BI << 5) | BO) for a BCC - unsigned BO = SelectPred & 0xF; - assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); - - unsigned TrueOpNo, FalseOpNo; - if (BO == 12) { - TrueOpNo = 2; - FalseOpNo = 3; - } else { - TrueOpNo = 3; - FalseOpNo = 2; - SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + unsigned SubIdx; + bool SwapOps; + switch (SelectPred) { + default: llvm_unreachable("invalid predicate for isel"); + case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; + case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; + case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; + case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; + case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; + case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; + case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; + case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; } BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(TrueOpNo).getReg()) - .addReg(MI->getOperand(FalseOpNo).getReg()) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + .addReg(MI->getOperand(SwapOps? 3 : 2).getReg()) + .addReg(MI->getOperand(SwapOps? 2 : 3).getReg()) + .addReg(MI->getOperand(1).getReg(), 0, SubIdx); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || @@ -6133,7 +6474,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -6236,6 +6577,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); + } else if (MI->getOpcode() == PPC::FADDrtz) { + // This pseudo performs an FADD with rounding mode temporarily forced + // to round-to-zero. We emit this via custom inserter since the FPSCR + // is not modeled at the SelectionDAG level. + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src1 = MI->getOperand(1).getReg(); + unsigned Src2 = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); + + // Set rounding mode to round-to-zero. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); + + // Perform addition. + BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + + // Restore FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); + } else if (MI->getOpcode() == PPC::FRINDrint || + MI->getOpcode() == PPC::FRINSrint) { + bool isf32 = MI->getOpcode() == PPC::FRINSrint; + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + + // Perform the rounding. + BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) + .addReg(Src); + + // Compare the results. + BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) + .addReg(Dest).addReg(Src); + + // If the results were not equal, then set the FPSCR XX bit. + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); + + BB->addSuccessor(midMBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + + // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set + // the FI bit here because that will not automatically set XX also, + // and XX is what libm interprets as the FE_INEXACT flag. + BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + + BB->addSuccessor(exitMBB); + + BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6321,8 +6731,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); - Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, - N->getOperand(2), N->getOperand(3)); + SDValue Ops[] = { + N->getOperand(0), Val, N->getOperand(2), + DAG.getValueType(N->getOperand(1).getValueType()) + }; + + Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + cast<StoreSDNode>(N)->getMemoryVT(), + cast<StoreSDNode>(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } @@ -6332,7 +6749,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || - N->getOperand(1).getValueType() == MVT::i16)) { + N->getOperand(1).getValueType() == MVT::i16 || + (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && + TM.getSubtarget<PPCSubtarget>().isPPC64() && + N->getOperand(1).getValueType() == MVT::i64))) { SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) @@ -6353,7 +6773,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || + (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && + TM.getSubtarget<PPCSubtarget>().isPPC64() && + N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); // Create the byte-swapping load. @@ -6364,8 +6787,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, - DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, - LD->getMemoryVT(), LD->getMemOperand()); + DAG.getVTList(N->getValueType(0) == MVT::i64 ? + MVT::i64 : MVT::i32, MVT::Other), + Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; @@ -6622,6 +7046,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); + return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); @@ -6806,13 +7233,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()) && - MFI->getStackSize() && - !MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); - unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : - (is31 ? PPC::R31 : PPC::R1); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned FrameReg; + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + FrameReg = isPPC64 ? PPC::X1 : PPC::R1; + else + FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) @@ -6851,6 +7281,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3931384d89..6690899e5d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -35,14 +36,18 @@ namespace llvm { /// was temporarily in the f64 operand. FCFID, + /// Newer FCFID[US] integer-to-floating-point conversion instructions for + /// unsigned integers and single-precision outputs. + FCFIDU, FCFIDS, FCFIDUS, + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to. - STFIWX, + /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for + /// unsigned integers. + FCTIDUZ, FCTIWUZ, // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. @@ -90,17 +95,10 @@ namespace llvm { /// code. SRL, SRA, SHL, - /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" - /// registers. - EXTSW_32, - /// CALL - A direct function call. - /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// CALL_NOP is a call with the special NOP which follows 64-bit /// SVR4 calls. - CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, - - /// NOP - Special NOP which follows 64-bit SVR4 calls. - NOP, + CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. @@ -108,7 +106,7 @@ namespace llvm { /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. - BCTRL_Darwin, BCTRL_SVR4, + BCTRL, /// Return with a flag operand, matched by 'blr' RET_FLAG, @@ -119,6 +117,12 @@ namespace llvm { /// are undefined. MFCR, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 @@ -138,26 +142,13 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, - // The following 5 instructions are used only as part of the - // long double-to-int conversion sequence. - - /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the - /// register. - MFFS, - - /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. - MTFSB0, - - /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. - MTFSB1, - - /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the - /// FPSCR-setting instructions. + /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding + /// towards zero. Used only as part of the long double-to-int + /// conversion sequence. FADDRTZ, - /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. - MTFSF, + /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. + MFFS, /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and /// reserve indexed. This is used to implement atomic operations. @@ -243,14 +234,11 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, - /// STD_32 - This is the STD instruction for use with "32-bit" registers. - STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, + STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, @@ -258,6 +246,20 @@ namespace llvm { /// or i32. LBRX, + /// STFIWX - The STFIWX instruction. The first operand is an input token + /// chain, then an f64 value to store, then an address to store it to. + STFIWX, + + /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point + /// load which sign-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWAX, + + /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point + /// load which zero-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWZX, + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to /// sym@toc@ha. @@ -321,6 +323,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; + const PPCRegisterInfo *PPCRegInfo; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -395,6 +398,12 @@ namespace llvm { MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -449,6 +458,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd @@ -494,7 +507,7 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; @@ -604,6 +617,9 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 01201304f7..fa5b65f0ba 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -30,12 +30,7 @@ def symbolLo64 : Operand<i64> { let EncoderMethod = "getLO16Encoding"; } def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} -def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 - let PrintMethod = "printMemRegImm"; - let EncoderMethod = "getMemRIXEncoding"; - let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); + let MIOperandInfo = (ops i64imm:$imm); } def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; @@ -71,133 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{ // Calls. // +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; +} + let Defs = [LR8] in def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; -// Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { - // Convenient aliases for call instructions - let Uses = [RM] in { - def BL8_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA8_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; - } - let Uses = [CTR8, RM] in { - def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [CTR8], Uses = [CTR8] in { + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } -// ELF 64 ABI Calls = Darwin ABI Calls -// Used to define BL8_ELF and BLA8_ELF let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL8_ELF : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", BrB, []>; // See Pat patterns below. - let isCodeGenOnly = 1 in - def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func), + "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; + } + let Uses = [RM], isCodeGenOnly = 1 in { + def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; - let isCodeGenOnly = 1 in - def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func, tlsgd:$sym), "bl $func($sym)\n\tnop", BrB, []>; - let isCodeGenOnly = 1 in - def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func, tlsgd:$sym), "bl $func($sym)\n\tnop", BrB, []>; - def BLA8_ELF : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; - - let isCodeGenOnly = 1 in - def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins aaddr:$func), "bla $func\n\tnop", BrB, - [(PPCcall_nop_SVR4 (i64 imm:$func))]>; + [(PPCcall_nop (i64 imm:$func))]>; } - let Uses = [X11, CTR8, RM] in { - def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; + let Uses = [CTR8, RM] in { + def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", BrB, [(PPCbctrl)]>, + Requires<[In64BitMode]>; } } // Calls -def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)), - (BL8_Darwin tglobaladdr:$dst)>; -def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), - (BL8_Darwin texternalsym:$dst)>; +def : Pat<(PPCcall (i64 tglobaladdr:$dst)), + (BL8 tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)), + (BL8_NOP tglobaladdr:$dst)>; -def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), - (BL8_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), - (BL8_NOP_ELF tglobaladdr:$dst)>; - -def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), - (BL8_ELF texternalsym:$dst)>; -def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), - (BL8_NOP_ELF texternalsym:$dst)>; - -def : Pat<(PPCnop), - (NOP)>; +def : Pat<(PPCcall (i64 texternalsym:$dst)), + (BL8 texternalsym:$dst)>; +def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), + (BL8_NOP texternalsym:$dst)>; // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64", - [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64", - [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64", - [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64", - [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64", - [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64", - [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64", - [(set G8RC:$dst, - (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; + [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64", - [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; + [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; } } // Instructions to support atomic operations def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr", LdStLDARX, - [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>; + [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), "stdcx. $rS, $dst", LdStSTDCX, - [(PPCstcx G8RC:$rS, xoaddr:$dst)]>, + [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in @@ -216,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; +let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, - isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in { - let isReturn = 1 in { - def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In64BitMode]>; - } - - def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In64BitMode]>; -} + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, @@ -242,6 +211,8 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +} + def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; @@ -251,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { - let Defs = [CTR8], Uses = [CTR8] in { - def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst">; - def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst">; - } -} -// 64-but CR instructions +// 64-bit CR instructions def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let isCodeGenOnly = 1 in def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), "#MFCR8pseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -273,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -281,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in { +let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(set G8RC:$rT, readcyclecounter)] in +let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), "mfspr $rT, 268", SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -298,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8", - [(set G8RC:$result, - (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; + [(set i64:$result, + (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS), @@ -321,131 +297,129 @@ let PPC970_Unit = 1 in { // FXU Operations. let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, - [(set G8RC:$rD, immSExt16:$imm)]>; + [(set i64:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, - [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; + [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "nand $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "and $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (and i64:$rS, i64:$rB))]>; def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "andc $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>; + [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "or $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (or i64:$rS, i64:$rB))]>; def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "nor $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "orc $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>; + [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "eqv $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "xor $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; // Logical ops with immediate. def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, - [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>, + [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, - [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>, + [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>; + [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>; + [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "add $rT, $rA, $rB", IntSimple, - [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. +let isCodeGenOnly = 1 in def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), "add $rT, $rA, $rB@tls", IntSimple, - [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>; + [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; let Defs = [CARRY] in { def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "addc $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>, + [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, PPC970_DGroup_Cracked; def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; + [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>; } -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>; + [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>; def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfc $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>, + [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; } def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subf $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; + [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), "neg $rT, $rA", IntSimple, - [(set G8RC:$rT, (ineg G8RC:$rA))]>; + [(set i64:$rT, (ineg i64:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "adde $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, -1))]>; + [(set i64:$rT, (adde i64:$rA, -1))]>; def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addze $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; + [(set i64:$rT, (adde i64:$rA, 0))]>; def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfe $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; + [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube -1, G8RC:$rA))]>; + [(set i64:$rT, (sube -1, i64:$rA))]>; def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfze $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; + [(set i64:$rT, (sube 0, i64:$rA))]>; } def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulhd $rT, $rA, $rB", IntMulHW, - [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulhdu $rT, $rA, $rB", IntMulHWU, - [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; @@ -458,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2), def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "sld $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srd $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; let Defs = [CARRY] in { def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srad $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; } def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), "extsb $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>; + [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), "extsh $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>; + [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), "extsw $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; -/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. -def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS), - "extsw $rA, $rS", IntSimple, - [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; + [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), "extsw $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; + [(set i64:$rA, (sext i32:$rS))]>, isPPC64; let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), "sradi $rA, $rS, $SH", IntRotateDI, - [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; + [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), "cntlzd $rA, $rS", IntGeneral, - [(set G8RC:$rA, (ctlz G8RC:$rS))]>; + [(set i64:$rA, (ctlz i64:$rS))]>; +def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS), + "popcntd $rA, $rS", IntGeneral, + [(set i64:$rA, (ctpop i64:$rS))]>; + +// popcntw also does a population count on the high 32 bits (storing the +// results in the high 32-bits of the output). We'll ignore that here (which is +// safe because we never separately use the high part of the 64-bit registers). +def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS), + "popcntw $rA, $rS", IntGeneral, + [(set i32:$rA, (ctpop i32:$rS))]>; def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "divd $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "divdu $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulld $rT, $rA, $rB", IntMulHD, - [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64; + [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; let isCommutable = 1 in { @@ -536,7 +516,7 @@ def RLWINM8 : MForm_2<21, []>; def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } // End FXU Operations. @@ -551,94 +531,96 @@ def ISEL8 : AForm_4<31, 15, let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>, + [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set i64:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>, + [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), "lwax $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in -def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, - ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLHAU, - []>, RegConstraint<"$rA = $ea_result">, +let mayLoad = 1 in { +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; + [(set i64:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; + [(set i64:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; + [(set i64:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; + [(set i64:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; + [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } @@ -648,31 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; -def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), - "ld $rD, $src", LdStLD, - []>, isPPC64; + [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; // The following three definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtoc", - [(set G8RC:$rD, - (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtocJTI", - [(set G8RC:$rD, - (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64; def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtocCPT", - [(set G8RC:$rD, - (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; -let hasSideEffects = 1 in { +let hasSideEffects = 1, isCodeGenOnly = 1 in { let RST = 2, DS = 2 in def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, - [(PPCload_toc G8RC:$reg)]>, isPPC64; + [(PPCload_toc i64:$reg)]>, isPPC64; let RST = 2, DS = 10, RA = 1 in def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), @@ -681,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), } def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, - [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; - + [(set i64:$rD, (load xaddr:$src))]>, isPPC64; +def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src), + "ldbrx $rD, $src", LdStLoad, + [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -702,188 +684,167 @@ def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; // Support for medium and large code model. -def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), +def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), "#ADDIStocHA", - [(set G8RC:$rD, - (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>, + [(set i64:$rD, + (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>, isPPC64; -def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), +def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg), "#LDtocL", - [(set G8RC:$rD, - (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; -def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), + [(set i64:$rD, + (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64; +def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), "#ADDItocL", - [(set G8RC:$rD, - (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64; + [(set i64:$rD, + (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64; // Support for thread-local storage. -def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDISgotTprelHA", - [(set G8RC:$rD, - (PPCaddisGotTprelHA G8RC:$reg, + [(set i64:$rD, + (PPCaddisGotTprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg), +def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg), "#LDgotTprelL", - [(set G8RC:$rD, - (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>, + [(set i64:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; -def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g), - (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>; -def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), + (ADD8TLS $in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDIStlsgdHA", - [(set G8RC:$rD, - (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), +def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), "#ADDItlsgdL", - [(set G8RC:$rD, - (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), "#GETtlsADDR", - [(set G8RC:$rD, - (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDIStlsldHA", - [(set G8RC:$rD, - (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), +def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), "#ADDItlsldL", - [(set G8RC:$rD, - (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), "#GETtlsldADDR", - [(set G8RC:$rD, - (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDISdtprelHA", - [(set G8RC:$rD, - (PPCaddisDtprelHA G8RC:$reg, + [(set i64:$rD, + (PPCaddisDtprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), +def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), "#ADDIdtprelL", - [(set G8RC:$rD, - (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), "stb $rS, $src", LdStStore, - [(truncstorei8 G8RC:$rS, iaddr:$src)]>; + [(truncstorei8 i64:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), "sth $rS, $src", LdStStore, - [(truncstorei16 G8RC:$rS, iaddr:$src)]>; + [(truncstorei16 i64:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), "stw $rS, $src", LdStStore, - [(truncstorei32 G8RC:$rS, iaddr:$src)]>; + [(truncstorei32 i64:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, - [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, - [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, - [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, - [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, + [(store i64:$rS, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst), + "stdbrx $rS, $dst", LdStStore, + [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } -let PPC970_Unit = 2 in { +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), + "stdu $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; -def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; - - -def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStSTDU, - [(set ptr_rc:$ea_res, - (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; - -// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. -def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), - "std $rT, $dst", LdStSTD, - [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64; -def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), - "stdx $rT, $dst", LdStSTD, - [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64, - PPC970_DGroup_Cracked; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STDU $rS, iaddroff:$ptroff, $ptrreg)>; + +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STDUX $rS, $ptrreg, $ptroff)>; //===----------------------------------------------------------------------===// @@ -894,10 +855,26 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations. def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), "fcfid $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64; + [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), "fctidz $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64; + [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; + +def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB), + "fcfidu $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; +def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfids $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; +def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfidus $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; +def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiduz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; +def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiwuz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } @@ -906,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), // // Extensions and truncates to/from 32-bit regs. -def : Pat<(i64 (zext GPRC:$in)), - (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32), +def : Pat<(i64 (zext i32:$in)), + (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 0, 32)>; -def : Pat<(i64 (anyext GPRC:$in)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>; -def : Pat<(i32 (trunc G8RC:$in)), - (EXTRACT_SUBREG G8RC:$in, sub_32)>; +def : Pat<(i64 (anyext i32:$in)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>; +def : Pat<(i32 (trunc i64:$in)), + (EXTRACT_SUBREG $in, sub_32)>; // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), @@ -939,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src), // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 6-bit and 7-bit shift // amounts. -def : Pat<(sra G8RC:$rS, GPRC:$rB), - (SRAD G8RC:$rS, GPRC:$rB)>; -def : Pat<(srl G8RC:$rS, GPRC:$rB), - (SRD G8RC:$rS, GPRC:$rB)>; -def : Pat<(shl G8RC:$rS, GPRC:$rB), - (SLD G8RC:$rS, GPRC:$rB)>; +def : Pat<(sra i64:$rS, i32:$rB), + (SRAD $rS, $rB)>; +def : Pat<(srl i64:$rS, i32:$rB), + (SRD $rS, $rB)>; +def : Pat<(shl i64:$rS, i32:$rB), + (SLD $rS, $rB)>; // SHL/SRL -def : Pat<(shl G8RC:$in, (i32 imm:$imm)), - (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>; -def : Pat<(srl G8RC:$in, (i32 imm:$imm)), - (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>; +def : Pat<(shl i64:$in, (i32 imm:$imm)), + (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>; +def : Pat<(srl i64:$in, (i32 imm:$imm)), + (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>; // ROTL -def : Pat<(rotl G8RC:$in, GPRC:$sh), - (RLDCL G8RC:$in, GPRC:$sh, 0)>; -def : Pat<(rotl G8RC:$in, (i32 imm:$imm)), - (RLDICL G8RC:$in, imm:$imm, 0)>; +def : Pat<(rotl i64:$in, i32:$sh), + (RLDCL $in, $sh, 0)>; +def : Pat<(rotl i64:$in, (i32 imm:$imm)), + (RLDICL $in, imm:$imm, 0)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>; @@ -967,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>; -def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in), - (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>; -def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in), - (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>; -def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)), - (ADDIS8 G8RC:$in, tglobaladdr:$g)>; -def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)), - (ADDIS8 G8RC:$in, tconstpool:$g)>; -def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), - (ADDIS8 G8RC:$in, tjumptable:$g)>; -def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), - (ADDIS8 G8RC:$in, tblockaddress:$g)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in), + (ADDIS8 $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in), + (ADDI8 $in, tglobaltlsaddr:$g)>; +def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS8 $in, tglobaladdr:$g)>; +def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)), + (ADDIS8 $in, tconstpool:$g)>; +def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)), + (ADDIS8 $in, tjumptable:$g)>; +def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS8 $in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), + (STDX $rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0cf28ae4b5..fff91df418 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -161,27 +161,90 @@ def vecspltisw : PatLeaf<(build_vector), [{ //===----------------------------------------------------------------------===// // Helpers for defining instructions that directly correspond to intrinsics. -// VA1a_Int - A VAForm_1a intrinsic definition. +// VA1a_Int - A VAForm_1a intrinsic definition of generic type. class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID> : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>; -// VX1_Int - A VXForm_1 intrinsic definition. +// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. +class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>; + +// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>; + +// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two +// input types and an output type. +class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set OutTy:$vD, + (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>; + +// VX1_Int - A VXForm_1 intrinsic definition of generic type. class VX1_Int<bits<11> xo, string opc, Intrinsic IntID> : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), !strconcat(opc, " $vD, $vA, $vB"), VecFP, [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>; -// VX2_Int - A VXForm_2 intrinsic definition. +// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. +class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>; + +// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>; + +// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two +// input types and an output type. +class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>; + +// VX2_Int - A VXForm_1 intrinsic definition of generic type. class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), !strconcat(opc, " $vD, $vB"), VecFP, [(set VRRC:$vD, (IntID VRRC:$vB))]>; +// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. +class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> + : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + !strconcat(opc, " $vD, $vB"), VecFP, + [(set v4f32:$vD, (IntID v4f32:$vB))]>; + +// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + !strconcat(opc, " $vD, $vB"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vB))]>; + //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + +let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -213,74 +276,79 @@ def DSTST64 : DSS_Form<374, (outs), def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; +} def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), "mfvscr $vD", LdStStore, - [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), "mtvscr $vB", LdStLoad, - [(int_ppc_altivec_mtvscr VRRC:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), "lvebx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; + [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), "lvehx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; + [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), "lvewx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), "lvx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), "lvxl $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), "lvsl $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), "lvsr $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), "stvebx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), "stvehx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), "stvewx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), "stvx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), "stvxl $rS, $dst", LdStStore, - [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; + [(set v4f32:$vD, + (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, - (fneg VRRC:$vB))))]>; + [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, + (fneg v4f32:$vB))))]>; + +def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; +def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, + v8i16>; +def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; -def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; -def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; -def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>; -def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>; -def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; +def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, + v4i32, v4i32, v16i8>; +def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), @@ -291,25 +359,25 @@ def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vaddfp $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>; + [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vaddubm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vadduhm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>; + [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vadduwm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; -def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>; -def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>; -def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>; -def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>; -def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>; -def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>; -def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>; +def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; +def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>; +def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>; +def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>; +def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>; +def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; +def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), @@ -322,20 +390,20 @@ def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfsx $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfux $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctsxs $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctuxs $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; // Defines with the UIM field set to 0 for floating-point // to integer (fp_to_sint/fp_to_uint) conversions and integer @@ -343,49 +411,49 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), let VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB), "vcfsx $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB), "vctuxs $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB), "vcfux $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfux VRRC:$vB, 0))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB), "vctsxs $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; } -def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; -def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; - -def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>; -def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>; -def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>; -def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>; -def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>; -def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>; - -def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>; -def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>; -def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>; -def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>; -def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>; -def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>; -def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>; -def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>; -def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>; -def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>; -def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>; -def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>; -def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>; -def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; +def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>; +def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>; + +def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>; +def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>; +def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>; +def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>; +def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>; +def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>; + +def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>; +def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>; +def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>; +def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>; +def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>; +def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>; +def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>; +def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>; +def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>; +def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>; +def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>; +def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; +def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; +def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghh $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; @@ -394,7 +462,7 @@ def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglh $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; @@ -402,55 +470,74 @@ def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglw $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; -def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; -def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; -def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>; -def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>; -def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>; -def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>; - -def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>; -def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>; -def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>; -def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>; -def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>; -def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>; -def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>; -def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>; +def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, + v4i32, v16i8, v4i32>; +def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm, + v4i32, v8i16, v4i32>; +def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, + v4i32, v8i16, v4i32>; +def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm, + v4i32, v16i8, v4i32>; +def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, + v4i32, v8i16, v4i32>; +def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, + v4i32, v8i16, v4i32>; + +def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, + v8i16, v16i8>; +def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh, + v4i32, v8i16>; +def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub, + v8i16, v16i8>; +def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh, + v4i32, v8i16>; +def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb, + v8i16, v16i8>; +def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh, + v4i32, v8i16>; +def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub, + v8i16, v16i8>; +def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh, + v4i32, v8i16>; -def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>; -def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>; -def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>; -def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>; -def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>; -def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; +def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>; +def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>; +def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>; +def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>; +def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>; +def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; -def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>; +def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubfp $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>; + [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsububm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubuhm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>; + [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubuwm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; -def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>; -def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>; -def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>; -def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>; -def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>; -def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>; -def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>; -def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>; -def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>; -def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>; -def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>; +def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; +def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>; +def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>; +def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>; +def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>; +def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; + +def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; +def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; + +def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs, + v4i32, v16i8, v4i32>; +def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, + v4i32, v8i16, v4i32>; +def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, + v4i32, v16i8, v4i32>; def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vnor $vD, $vA, $vB", VecFP, @@ -463,15 +550,16 @@ def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vxor $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>; -def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>; -def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>; -def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>; +def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; +def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>; +def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>; -def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >; -def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>; -def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>; -def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>; -def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; +def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >; +def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>; + +def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>; +def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; +def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, @@ -486,60 +574,74 @@ def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), [(set VRRC:$vD, (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; -def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; -def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; -def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>; -def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>; -def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>; -def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>; -def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>; -def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>; +def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>; +def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>; + +def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>; +def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>; +def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>; +def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>; +def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>; +def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltisb $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>; + [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltish $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>; + [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltisw $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>; + [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; // Vector Pack. -def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>; -def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>; -def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>; -def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; -def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; +def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx, + v8i16, v4i32>; +def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, + v16i8, v8i16>; +def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, + v16i8, v8i16>; +def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, + v16i8, v4i32>; +def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, + v8i16, v4i32>; def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuhum $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; -def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; +def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, + v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuwum $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; -def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; +def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, + v8i16, v4i32>; // Vector Unpack. -def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>; -def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>; -def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>; -def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>; -def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>; -def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>; +def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx, + v4i32, v8i16>; +def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb, + v8i16, v16i8>; +def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh, + v4i32, v8i16>; +def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx, + v4i32, v8i16>; +def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb, + v8i16, v16i8>; +def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, + v4i32, v8i16>; // Altivec Comparisons. class VCMP<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, - [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>; + [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; class VCMPo<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, - [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> { + [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> { let Defs = [CR6]; let RC = 1; } @@ -578,13 +680,14 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; +let isCodeGenOnly = 1 in def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, [(set VRRC:$vD, (v4i32 immAllZerosV))]>; let IMM=-1 in { def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins), "vspltisw $vD, -1", VecFP, - [(set VRRC:$vD, (v4i32 immAllOnesV))]>; + [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } } // VALU Operations. @@ -597,31 +700,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>; def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>; // * 32-bit -def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM), - (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM), + (DST 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM), + (DSTT 1, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM), + (DSTST 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM), + (DSTSTT 1, imm:$STRM, $rA, $rB)>; // * 64-bit -def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM), - (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM), + (DST64 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM), + (DSTT64 1, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM), + (DSTST64 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM), + (DSTSTT64 1, imm:$STRM, $rA, $rB)>; // Loads. def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; // Stores. -def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), - (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; +def : Pat<(store v4i32:$rS, xoaddr:$dst), + (STVX $rS, xoaddr:$dst)>; // Bit conversions. def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; @@ -643,93 +746,96 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) -def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), - (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; -def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), - (VPKUWUM VRRC:$vA, VRRC:$vA)>; -def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), - (VPKUHUM VRRC:$vA, VRRC:$vA)>; +def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef), + (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm VRRC:$in))>; +def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), + (VPKUWUM $vA, $vA)>; +def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), + (VPKUHUM $vA, $vA)>; // Match vmrg*(x,x) -def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLW VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHW VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), + (VMRGLB $vA, $vA)>; +def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef), + (VMRGLH $vA, $vA)>; +def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef), + (VMRGLW $vA, $vA)>; +def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef), + (VMRGHB $vA, $vA)>; +def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef), + (VMRGHH $vA, $vA)>; +def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), + (VMRGHW $vA, $vA)>; // Logical Operations -def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; +def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR $vA, $vA)>; def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))), - (VNOR VRRC:$A, VRRC:$B)>; + (VNOR $A, $B)>; def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))), - (VANDC VRRC:$A, VRRC:$B)>; + (VANDC $A, $B)>; -def : Pat<(fmul VRRC:$vA, VRRC:$vB), - (VMADDFP VRRC:$vA, VRRC:$vB, +def : Pat<(fmul v4f32:$vA, v4f32:$vB), + (VMADDFP $vA, $vB, (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have // the additional precision, such as Newton-Rhapson (used by divide, sqrt) -def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C), - (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; -def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), - (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; -def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C), - (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; -def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), - (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC), - (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>; + (VPERM $vA, $vB, $vC)>; // Vector shifts -def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>; - -def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>; - -def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>; +def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSLB $vA, $vB))>; +def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSLH $vA, $vB))>; +def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSLW $vA, $vB))>; + +def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRB $vA, $vB))>; +def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRH $vA, $vB))>; +def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRW $vA, $vB))>; + +def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRAB $vA, $vB))>; +def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRAH $vA, $vB))>; +def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRAW $vA, $vB))>; // Float to integer and integer to float conversions -def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))), - (VCTSXS_0 VRRC:$vA)>; -def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))), - (VCTUXS_0 VRRC:$vA)>; -def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))), - (VCFSX_0 VRRC:$vA)>; -def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))), - (VCFUX_0 VRRC:$vA)>; +def : Pat<(v4i32 (fp_to_sint v4f32:$vA)), + (VCTSXS_0 $vA)>; +def : Pat<(v4i32 (fp_to_uint v4f32:$vA)), + (VCTUXS_0 $vA)>; +def : Pat<(v4f32 (sint_to_fp v4i32:$vA)), + (VCFSX_0 $vA)>; +def : Pat<(v4f32 (uint_to_fp v4i32:$vA)), + (VCFUX_0 $vA)>; // Floating-point rounding -def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))), - (VRFIM VRRC:$vA)>; -def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))), - (VRFIP VRRC:$vA)>; -def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), - (VRFIZ VRRC:$vA)>; -def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), - (VRFIN VRRC:$vA)>; +def : Pat<(v4f32 (ffloor v4f32:$vA)), + (VRFIM $vA)>; +def : Pat<(v4f32 (fceil v4f32:$vA)), + (VRFIP $vA)>; +def : Pat<(v4f32 (ftrunc v4f32:$vA)), + (VRFIZ $vA)>; +def : Pat<(v4f32 (fnearbyint v4f32:$vA)), + (VRFIN $vA)>; + +} // end HasAltivec + diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index c3c171cd21..400b7e367b 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, let CR = 0; } +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, // This is probably 1.7.9, but I don't have the reference that uses this // numbering scheme... class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, - string cstr, InstrItinClass itin, list<dag>pattern> + InstrItinClass itin, list<dag>pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FM; bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; - let Constraints = cstr; let Inst{6} = 0; let Inst{7-14} = FM; @@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, bits<5> RT; bits<5> RA; bits<5> RB; - bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12). - bits<3> CR; + bits<5> COND; let Pattern = pattern; let Inst{6-10} = RT; let Inst{11-15} = RA; let Inst{16-20} = RB; - let Inst{21-23} = CR; - let Inst{24-25} = BIBO{6-5}; + let Inst{21-25} = COND; let Inst{26-30} = xo; let Inst{31} = 0; } @@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, //===----------------------------------------------------------------------===// class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> : I<0, OOL, IOL, asmstr, NoItinerary> { + let isCodeGenOnly = 1; let PPC64 = 0; let Pattern = pattern; let Inst{31-0} = 0; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a0517a80a9..69c54ed084 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,11 +33,6 @@ #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { + // Note: This list must be kept consistent with LoadRegFromStackSlot. switch (MI->getOpcode()) { default: break; case PPC::LD: case PPC::LWZ: case PPC::LFS: case PPC::LFD: + case PPC::RESTORE_CR: + case PPC::LVX: + case PPC::RESTORE_VRSAVE: + // Check for the operands added by addFrameReference (the immediate is the + // offset which defaults to 0). if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); @@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { + // Note: This list must be kept consistent with StoreRegToStackSlot. switch (MI->getOpcode()) { default: break; case PPC::STD: case PPC::STW: case PPC::STFS: case PPC::STFD: + case PPC::SPILL_CR: + case PPC::STVX: + case PPC::SPILL_VRSAVE: + // Check for the operands added by addFrameReference (the immediate is the + // offset which defaults to 0). if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); @@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const{ + // Note: If additional store instructions are added here, + // update isStoreToStackSlot. + DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { - if (SrcReg != PPC::LR) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - } else { - // FIXME: this spills LR immediately to memory in one step. To do this, - // we use R11, which we know cannot be used in the prolog/epilog. This is - // a hack. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11)); - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(PPC::R11, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { - if (SrcReg != PPC::LR8) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - } else { - // FIXME: this spills LR immediately to memory in one step. To do this, - // we use X11, which we know cannot be used in the prolog/epilog. This is - // a hack. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) - .addReg(PPC::X11, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) .addReg(SrcReg, @@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + assert(TM.getSubtargetImpl()->isDarwin() && + "VRSAVE only needs spill/restore on Darwin"); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + SpillsVRS = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false, SpillsVRS = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, + NonRI, SpillsVRS)) FuncInfo->setSpillsCR(); - } + + if (SpillsVRS) + FuncInfo->setSpillsVRSAVE(); + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,25 +577,17 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const{ + // Note: If additional load instructions are added here, + // update isLoadFromStackSlot. + if (PPC::GPRCRegClass.hasSubClassEq(RC)) { - if (DestReg != PPC::LR) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - DestReg), FrameIdx)); - } else { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - PPC::R11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + DestReg), FrameIdx)); } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { - if (DestReg != PPC::LR8) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), - FrameIdx)); - } else { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), - PPC::X11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), + FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), FrameIdx)); @@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + assert(TM.getSubtargetImpl()->isDarwin() && + "VRSAVE only needs spill/restore on Darwin"); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_VRSAVE), + DestReg), + FrameIdx)); + SpillsVRS = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false, SpillsVRS = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, + NonRI, SpillsVRS)) FuncInfo->setSpillsCR(); - } + + if (SpillsVRS) + FuncInfo->setSpillsVRSAVE(); + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; - case PPC::BL8_NOP_ELF: - case PPC::BLA8_NOP_ELF: + case PPC::BL8_NOP: + case PPC::BLA8_NOP: return 8; default: return 4; // PowerPC instructions are all 4 bytes diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 374213ea43..635e3480b0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e94342d..067f5aacfa 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -20,6 +20,10 @@ include "PPCInstrFormats.td" def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; +def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; + def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ ]>; def SDT_PPClbrx : SDTypeProfile<1, 2, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPCstbrx : SDTypeProfile<0, 3, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPClarx : SDTypeProfile<1, 1, [ @@ -53,32 +57,33 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; -def SDT_PPCnop : SDTypeProfile<0, 0, []>; //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // -def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>; +def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; +def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; +def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; +def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; +def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; +def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; +def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; + +// Extract FPSCR (not modeled at the DAG level). +def PPCmffs : SDNode<"PPCISD::MFFS", + SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; + +// Perform FADD in round-to-zero mode. +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; -// This sequence is used for long double->int conversions. It changes the -// bits in the FPSCR which is not modelled. -def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, - [SDNPOutGlue]>; -def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, - [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, - SDTCisVT<3, f64>]>, - [SDNPInGlue]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -113,10 +118,6 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; -def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>; -def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, - [SDNPHasChain, SDNPMayStore]>; - // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -124,16 +125,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; +def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, @@ -144,13 +141,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; +def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -158,6 +151,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -278,6 +279,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -314,9 +347,6 @@ def s16imm : Operand<i32> { def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; } -def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. - let PrintMethod = "printS16X4ImmOperand"; -} def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; @@ -344,26 +374,37 @@ def crbitm: Operand<i8> { let EncoderMethod = "get_crbitm_encoding"; } // Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def ptr_rc_nor0 : PointerLikeRegClass<1>; + +def dispRI : Operand<iPTR>; +def dispRIX : Operand<iPTR>; + def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } -// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg -// that doesn't matter. -def pred : PredicateOperand<OtherVT, (ops imm, CRRC), - (ops (i32 20), (i32 zero_reg))> { +// A single-register address. This is used with the SjLj +// pseudo-instructions. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc:$ptrreg); +} + +// PowerPC Predicate operand. +def pred : Operand<OtherVT> { let PrintMethod = "printPredicateOperand"; + let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg); } // Define PowerPC specific addressing mode. @@ -372,9 +413,12 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; -def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. @@ -401,17 +445,22 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), let Defs = [R1], Uses = [R1] in def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC", - [(set GPRC:$result, - (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; + [(set i32:$result, + (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { - def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, + // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes + // because either operand might become the first operand in an isel, and + // that operand cannot be r0. + def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, + GPRC_NOR0:$T, GPRC_NOR0:$F, i32imm:$BROPC), "#SELECT_CC_I4", []>; - def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, + def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, + G8RC_NOX0:$T, G8RC_NOX0:$F, i32imm:$BROPC), "#SELECT_CC_I8", []>; def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, @@ -438,10 +487,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), "#RESTORE_CR", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { - let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in - def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), - "b${p:cc}lr ${p:reg}", BrB, - [(retflag)]>; + let isReturn = 1, Uses = [LR, RM] in + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, + [(retflag)]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; } @@ -473,46 +521,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } } -// Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { - // Convenient aliases for call instructions - let Uses = [RM] in { - def BL_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; - } - let Uses = [CTR, RM] in { - def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; +// The direct BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; } } -// SVR4 ABI Calls. let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_SVR4 : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_SVR4 : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, - [(PPCcall_SVR4 (i32 imm:$func))]>; + def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func), + "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; + def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", BrB, [(PPCbctrl)]>, + Requires<[In32BitMode]>; } } - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -531,6 +562,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), []>; +let isCodeGenOnly = 1 in { + let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, @@ -544,6 +577,7 @@ def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), "b $dst", BrB, []>; +} let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in @@ -551,6 +585,22 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +let isBranch = 1, isTerminator = 1 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), @@ -586,93 +636,90 @@ let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8", - [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8", - [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8", - [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8", - [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8", - [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8", - [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16", - [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16", - [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16", - [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16", - [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16", - [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16", - [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32", - [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32", - [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32", - [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32", - [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32", - [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32", - [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8", - [(set GPRC:$dst, - (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", - [(set GPRC:$dst, - (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", - [(set GPRC:$dst, - (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8", - [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16", - [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32", - [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; } } // Instructions to support atomic operations def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src), "lwarx $rD, $src", LdStLWARX, - [(set GPRC:$rD, (PPClarx xoaddr:$src))]>; + [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), "stwcx. $rS, $dst", LdStSTWCX, - [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, + [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in @@ -686,94 +733,94 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; + [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, - [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, + [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; + [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, - [(set GPRC:$rD, (load iaddr:$src))]>; + [(set i32:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), "lfs $rD, $src", LdStLFD, - [(set F4RC:$rD, (load iaddr:$src))]>; + [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, - [(set F8RC:$rD, (load iaddr:$src))]>; + [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } @@ -783,32 +830,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; + [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, - [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, + [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; + [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (load xaddr:$src))]>; + [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), "lhbrx $rD, $src", LdStLoad, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; + [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), "lwbrx $rD, $src", LdStLoad, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; + [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), "lfsx $frD, $src", LdStLFD, - [(set F4RC:$frD, (load xaddr:$src))]>; + [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), "lfdx $frD, $src", LdStLFD, - [(set F8RC:$frD, (load xaddr:$src))]>; + [(set f64:$frD, (load xaddr:$src))]>; + +def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src), + "lfiwax $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; +def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src), + "lfiwzx $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } //===----------------------------------------------------------------------===// @@ -819,137 +873,128 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), "stb $rS, $src", LdStStore, - [(truncstorei8 GPRC:$rS, iaddr:$src)]>; + [(truncstorei8 i32:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), "sth $rS, $src", LdStStore, - [(truncstorei16 GPRC:$rS, iaddr:$src)]>; + [(truncstorei16 i32:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, - [(store GPRC:$rS, iaddr:$src)]>; + [(store i32:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStSTFD, - [(store F4RC:$rS, iaddr:$dst)]>; + [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), "stfd $rS, $dst", LdStSTFD, - [(store F8RC:$rS, iaddr:$dst)]>; + [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2 in { -def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +let PPC970_Unit = 2, mayStore = 1 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), + "stfsu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), + "stfdu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFSU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFDU $rS, iaddroff:$ptroff, $ptrreg)>; // Indexed (r+r) Stores. -// let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, - [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, + [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, - [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, + [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, - [(store GPRC:$rS, xaddr:$dst)]>, - PPC970_DGroup_Cracked; - -def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), - (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), - (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, + [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), "stwbrx $rS, $dst", LdStStore, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, + [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), "stfiwx $frS, $dst", LdStSTFD, - [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; + [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), "stfsx $frS, $dst", LdStSTFD, - [(store F4RC:$frS, xaddr:$dst)]>; + [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), "stfdx $frS, $dst", LdStSTFD, - [(store F8RC:$frS, xaddr:$dst)]>; + [(store f64:$frS, xaddr:$dst)]>; } +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), + "stfsux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), + "stfdux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFDUX $rS, $ptrreg, $ptroff)>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -959,68 +1004,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; -def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), - "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>, + [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>, PPC970_DGroup_Cracked; def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; +let isCodeGenOnly = 1 in +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, - [(set GPRC:$rD, (add GPRC:$rA, + [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, - [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; + [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, - [(set GPRC:$rD, immSExt16:$imm)]>; + [(set i32:$rD, immSExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), "lis $rD, $imm", IntSimple, - [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>; + [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } let PPC970_Unit = 1 in { // FXU Operations. def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, - [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>, + [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, - [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>, + [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>; + [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>; + [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>; + [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, []>; def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), @@ -1033,38 +1076,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), let PPC970_Unit = 1 in { // FXU Operations. def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "nand $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "and $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (and i32:$rS, i32:$rB))]>; def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "andc $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>; + [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "or $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (or i32:$rS, i32:$rB))]>; def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "nor $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "orc $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>; + [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "eqv $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "xor $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "slw $rA, $rS, $rB", IntGeneral, - [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "srw $rA, $rS, $rB", IntGeneral, - [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; let Defs = [CARRY] in { def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "sraw $rA, $rS, $rB", IntShift, - [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } } @@ -1072,17 +1115,17 @@ let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CARRY] in { def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), "srawi $rA, $rS, $SH", IntShift, - [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; + [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; } def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), "cntlzw $rA, $rS", IntGeneral, - [(set GPRC:$rA, (ctlz GPRC:$rS))]>; + [(set i32:$rA, (ctlz i32:$rS))]>; def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), "extsb $rA, $rS", IntSimple, - [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>; + [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), "extsh $rA, $rS", IntSimple, - [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>; + [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), "cmpw $crD, $rA, $rB", IntCompare>; @@ -1100,16 +1143,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB), let Uses = [RM] in { def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), "fctiwz $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>; + [(set f64:$frD, (PPCfctiwz f64:$frB))]>; + def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), "frsp $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fround F8RC:$frB))]>; + [(set f32:$frD, (fround f64:$frB))]>; + + // The frin -> nearbyint mapping is valid only in fast-math mode. + def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f64:$frD, (fnearbyint f64:$frB))]>; + def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f32:$frD, (fnearbyint f32:$frB))]>; + + // These pseudos expand to rint but also set FE_INEXACT when the result does + // not equal the argument. + let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR! + def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB), + "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>; + def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB), + "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>; + } + + def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f64:$frD, (fceil f64:$frB))]>; + def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f32:$frD, (fceil f32:$frB))]>; + def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f64:$frD, (ftrunc f64:$frB))]>; + def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f32:$frD, (ftrunc f32:$frB))]>; + def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f64:$frD, (ffloor f64:$frB))]>; + def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f32:$frD, (ffloor f32:$frB))]>; + def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), "fsqrt $frD, $frB", FPSqrt, - [(set F8RC:$frD, (fsqrt F8RC:$frB))]>; + [(set f64:$frD, (fsqrt f64:$frB))]>; def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB), "fsqrts $frD, $frB", FPSqrt, - [(set F4RC:$frD, (fsqrt F4RC:$frB))]>; + [(set f32:$frD, (fsqrt f32:$frB))]>; } } @@ -1119,29 +1200,29 @@ let Uses = [RM] in { /// sneak into a d-group with a store). def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, - []>, // (set F4RC:$frD, F4RC:$frB) + []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB), "fabs $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fabs F4RC:$frB))]>; + [(set f32:$frD, (fabs f32:$frB))]>; def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB), "fabs $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fabs F8RC:$frB))]>; + [(set f64:$frD, (fabs f64:$frB))]>; def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB), "fnabs $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>; + [(set f32:$frD, (fneg (fabs f32:$frB)))]>; def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB), "fnabs $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>; + [(set f64:$frD, (fneg (fabs f64:$frB)))]>; def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB), "fneg $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fneg F4RC:$frB))]>; + [(set f32:$frD, (fneg f32:$frB))]>; def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB), "fneg $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fneg F8RC:$frB))]>; + [(set f64:$frD, (fneg f64:$frB))]>; } @@ -1161,6 +1242,7 @@ def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD), "cror $CRD, $CRA, $CRB", BrCR, []>; +let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), "creqv $dst, $dst, $dst", BrCR, []>; @@ -1178,6 +1260,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), "crxor 6, 6, 6", BrCR, [(PPCcr6unset)]>; } +} // XFX-Form instructions. Instructions that deal with SPRs. // @@ -1186,7 +1269,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in { +let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -1213,6 +1296,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; +let isCodeGenOnly = 1 in { + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1227,6 +1333,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), // instruction to keep the register allocator from becoming confused. // // FIXME: Make this a real Pseudo instruction when the JIT switches to MC. +let isCodeGenOnly = 1 in def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "#MFCRpseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1239,38 +1346,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -// Instructions to manipulate FPSCR. Only long double handling uses these. -// FPSCR is not modelled; we use the SDNode Flag to keep things in order. +// Pseudo instruction to perform FADD in round-to-zero mode. +let usesCustomInserter = 1, Uses = [RM] in { + def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "", + [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; +} +// The above pseudo gets expanded to make use of the following instructions +// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, - [(PPCmtfsb0 (i32 imm:$FM))]>, + "mtfsb0 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, - [(PPCmtfsb1 (i32 imm:$FM))]>, + "mtfsb1 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; - // MTFSF does not actually produce an FP result. We pretend it copies - // input reg B to the output. If we didn't do this it would look like the - // instruction had no outputs (because we aren't modelling the FPSCR) and - // it would be deleted. - def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), - (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), - "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, - [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), - F8RC:$rT, F8RC:$FRB))]>, + def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT), + "mtfsf $FM, $rT", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), "mffs $rT", IntMFFS, - [(set F8RC:$rT, (PPCmffs))]>, - PPC970_DGroup_Single, PPC970_Unit_FPU; - def FADDrtz: AForm_2<63, 21, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, + [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1281,61 +1379,61 @@ let PPC970_Unit = 1 in { // FXU Operations. // def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "add $rT, $rA, $rB", IntSimple, - [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (add i32:$rA, i32:$rB))]>; let Defs = [CARRY] in { def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "addc $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; } def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divw $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divwu $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mulhw $rT, $rA, $rB", IntMulHW, - [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mulhwu $rT, $rA, $rB", IntMulHWU, - [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mullw $rT, $rA, $rB", IntMulHW, - [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subf $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>; + [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; let Defs = [CARRY] in { def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfc $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>, + [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; } def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), "neg $rT, $rA", IntSimple, - [(set GPRC:$rT, (ineg GPRC:$rA))]>; + [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "adde $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, -1))]>; + [(set i32:$rT, (adde i32:$rA, -1))]>; def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addze $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; + [(set i32:$rT, (adde i32:$rA, 0))]>; def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfe $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; + [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube -1, GPRC:$rA))]>; + [(set i32:$rT, (sube -1, i32:$rA))]>; def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfze $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; + [(set i32:$rT, (sube 0, i32:$rA))]>; } } @@ -1347,43 +1445,41 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; + [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; + [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; + [(set f64:$FRT, + (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; + [(set f32:$FRT, + (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; + [(set f64:$FRT, + (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; + [(set f32:$FRT, + (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, - (fneg F8RC:$FRB))))]>; + [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, + (fneg f64:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, - (fneg F4RC:$FRB))))]>; + [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, + (fneg f32:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1392,50 +1488,50 @@ let Uses = [RM] in { def FSELD : AForm_1<63, 23, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>; + [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; def FSELS : AForm_1<63, 23, (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>; + [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fadds $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; def FDIV : AForm_2<63, 18, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fdiv $FRT, $FRA, $FRB", FPDivD, - [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; def FDIVS : AForm_2<59, 18, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fdivs $FRT, $FRA, $FRB", FPDivS, - [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; def FMUL : AForm_3<63, 25, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), "fmul $FRT, $FRA, $FRC", FPFused, - [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>; + [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; def FMULS : AForm_3<59, 25, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), "fmuls $FRT, $FRA, $FRC", FPGeneral, - [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>; + [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fsub $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fsubs $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } let PPC970_Unit = 1 in { // FXU Operations. def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } @@ -1475,47 +1571,43 @@ def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. -def NOT : Pat<(not GPRC:$in), - (NOR GPRC:$in, GPRC:$in)>; +def NOT : Pat<(not i32:$in), + (NOR $in, $in)>; // ADD an arbitrary immediate. -def : Pat<(add GPRC:$in, imm:$imm), - (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>; +def : Pat<(add i32:$in, imm:$imm), + (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>; // OR an arbitrary immediate. -def : Pat<(or GPRC:$in, imm:$imm), - (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +def : Pat<(or i32:$in, imm:$imm), + (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // XOR an arbitrary immediate. -def : Pat<(xor GPRC:$in, imm:$imm), - (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +def : Pat<(xor i32:$in, imm:$imm), + (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // SUBFIC -def : Pat<(sub immSExt16:$imm, GPRC:$in), - (SUBFIC GPRC:$in, imm:$imm)>; +def : Pat<(sub immSExt16:$imm, i32:$in), + (SUBFIC $in, imm:$imm)>; // SHL/SRL -def : Pat<(shl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>; -def : Pat<(srl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>; +def : Pat<(shl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>; +def : Pat<(srl i32:$in, (i32 imm:$imm)), + (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>; // ROTL -def : Pat<(rotl GPRC:$in, GPRC:$sh), - (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>; -def : Pat<(rotl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, imm:$imm, 0, 31)>; +def : Pat<(rotl i32:$in, i32:$sh), + (RLWNM $in, $sh, 0, 31)>; +def : Pat<(rotl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, 31)>; // RLWNM -def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), - (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; +def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm), + (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; // Calls -def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)), - (BL_Darwin tglobaladdr:$dst)>; -def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)), - (BL_Darwin texternalsym:$dst)>; -def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)), - (BL_SVR4 tglobaladdr:$dst)>; -def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)), - (BL_SVR4 texternalsym:$dst)>; +def : Pat<(PPCcall (i32 tglobaladdr:$dst)), + (BL tglobaladdr:$dst)>; +def : Pat<(PPCcall (i32 texternalsym:$dst)), + (BL texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), @@ -1538,28 +1630,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; -def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in), - (ADDIS GPRC:$in, tglobaltlsaddr:$g)>; -def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in), - (ADDIL GPRC:$in, tglobaltlsaddr:$g)>; -def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)), - (ADDIS GPRC:$in, tglobaladdr:$g)>; -def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), - (ADDIS GPRC:$in, tconstpool:$g)>; -def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), - (ADDIS GPRC:$in, tjumptable:$g)>; -def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), - (ADDIS GPRC:$in, tblockaddress:$g)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in), + (ADDIS $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in), + (ADDI $in, tglobaltlsaddr:$g)>; +def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS $in, tglobaladdr:$g)>; +def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)), + (ADDIS $in, tconstpool:$g)>; +def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), + (ADDIS $in, tjumptable:$g)>; +def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS $in, tblockaddress:$g)>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. -def : Pat<(sra GPRC:$rS, GPRC:$rB), - (SRAW GPRC:$rS, GPRC:$rB)>; -def : Pat<(srl GPRC:$rS, GPRC:$rB), - (SRW GPRC:$rS, GPRC:$rB)>; -def : Pat<(shl GPRC:$rS, GPRC:$rB), - (SLW GPRC:$rS, GPRC:$rB)>; +def : Pat<(sra i32:$rS, i32:$rB), + (SRAW $rS, $rB)>; +def : Pat<(srl i32:$rS, i32:$rB), + (SRW $rS, $rB)>; +def : Pat<(shl i32:$rS, i32:$rB), + (SLW $rS, $rB)>; def : Pat<(zextloadi1 iaddr:$src), (LBZ iaddr:$src)>; @@ -1582,8 +1674,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)), def : Pat<(f64 (extloadf32 xaddr:$src)), (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; -def : Pat<(f64 (fextend F4RC:$src)), - (COPY_TO_REGCLASS F4RC:$src, F8RC)>; +def : Pat<(f64 (fextend f32:$src)), + (COPY_TO_REGCLASS $src, F8RC)>; // Memory barriers def : Pat<(membarrier (i32 imm /*ll*/), diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375dd8..ee18eadf6e 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; + /// Indicates whether VRSAVE is spilled in the current function. + bool SpillsVRSAVE; + /// LRStoreRequired - The bool indicates whether there is some explicit use of /// the LR/LR8 stack slot that is not obvious from scanning the code. This /// requires that the code generator produce a store of LR to the stack on @@ -78,7 +88,10 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), + SpillsVRSAVE(false), LRStoreRequired(false), MinReservedArea(0), TailCallSPDelta(0), @@ -109,9 +122,18 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } + void setSpillsVRSAVE() { SpillsVRSAVE = true; } + bool isVRSAVESpilled() const { return SpillsVRSAVE; } + void setLRStoreRequired() { LRStoreRequired = true; } bool isLRStoreRequired() const { return LRStoreRequired; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index df245cc655..1d61a3a8ea 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,26 +46,8 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, @@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8; ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; - ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; -} - -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); + ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; } - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + if (Kind == 1) { + if (Subtarget.isPPC64()) + return &PPC::G8RC_NOX0RegClass; + return &PPC::GPRC_NOR0RegClass; + } + if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -123,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } +const uint32_t* +PPCRegisterInfo::getNoPreservedMask() const { + // The naming here is inverted: The CSR_NoRegs_Altivec has the + // Altivec registers masked so that they're not saved and restored around + // instructions with this preserved mask. + + if (!Subtarget.hasAltivec()) + return CSR_NoRegs_Altivec_RegMask; + + if (Subtarget.isDarwin()) + return CSR_NoRegs_Darwin_RegMask; + return CSR_NoRegs_RegMask; +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); - Reserved.set(PPC::R0); + // The ZERO register is not really a register, but the representation of r0 + // when used in instructions that treat r0 as the constant 0. + Reserved.set(PPC::ZERO); + Reserved.set(PPC::ZERO8); + + // The FP register is also not really a register, but is the representation + // of the frame pointer register used by ISD::FRAMEADDR. + Reserved.set(PPC::FP); + Reserved.set(PPC::FP8); + Reserved.set(PPC::R1); Reserved.set(PPC::LR); Reserved.set(PPC::LR8); @@ -139,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. - // Note that this is over conservative, as it also prevents allocation of R31 - // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); - Reserved.set(PPC::R31); - Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); - Reserved.set(PPC::X31); + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::X31); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; + case PPC::G8RC_NOX0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -238,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, /// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size /// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation /// -void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Get the instruction. MachineInstr &MI = *II; // Get the instruction's basic block. @@ -271,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -302,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -354,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, /// stw rA, FI ; Store rA to the frame. /// void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex, int SPAdj, - RegScavenger *RS) const { + unsigned FrameIndex) const { // Get the instruction. MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); DebugLoc dl = MI.getDebugLoc(); - // FIXME: Once LLVM supports creating virtual registers here, or the register - // scavenger can return multiple registers, stop using reserved registers - // here. - (void) SPAdj; - (void) RS; - bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -380,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. - if (SrcReg != PPC::CR0) + if (SrcReg != PPC::CR0) { + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg, RegState::Kill) - .addImm(getPPCRegisterNumbering(SrcReg) * 4) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg) * 4) .addImm(0) .addImm(31); + } addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) - .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + .addReg(Reg, RegState::Kill), FrameIndex); // Discard the pseudo instruction. @@ -397,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, } void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, - unsigned FrameIndex, int SPAdj, - RegScavenger *RS) const { + unsigned FrameIndex) const { // Get the instruction. MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); DebugLoc dl = MI.getDebugLoc(); - // FIXME: Once LLVM supports creating virtual registers here, or the register - // scavenger can return multiple registers, stop using reserved registers - // here. - (void) SPAdj; - (void) RS; - bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -424,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + + unsigned ShiftBits = getEncodingValue(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg).addImm(32-ShiftBits).addImm(0) + .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0) .addImm(31); } BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(Reg); + .addReg(Reg, RegState::Kill); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, RegState::Kill), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_VRSAVE does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) + .addReg(Reg, RegState::Kill); // Discard the pseudo instruction. MBB.erase(II); @@ -494,19 +487,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special case for dynamic alloca. if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { - lowerDynamicAlloc(II, SPAdj, RS); + lowerDynamicAlloc(II); return; } - // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex); + return; + } else if (OpC == PPC::SPILL_VRSAVE) { + lowerVRSAVESpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_VRSAVE) { + lowerVRSAVERestore(II, FrameIndex); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -525,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case PPC::LWA: case PPC::LD: case PPC::STD: - case PPC::STD_32: isIXAddr = true; break; } - + + // If the instruction is not present in ImmToIdxMap, then it has no immediate + // form (and must be r+r). + bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC); + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -553,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -563,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; + unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC), + SReg = MF.getRegInfo().createVirtualRegister(RC); // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) .addImm(Offset >> 16); BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SReg, RegState::Kill) + .addReg(SRegHi, RegState::Kill) .addImm(Offset); // Convert into indexed form of the instruction: @@ -584,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9840666242..7e6683eeb2 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -15,8 +15,8 @@ #ifndef POWERPC32_REGISTERINFO_H #define POWERPC32_REGISTERINFO_H +#include "llvm/ADT/DenseMap.h" #include "PPC.h" -#include <map> #define GET_REGINFO_HEADER #include "PPCGenRegisterInfo.inc" @@ -27,7 +27,7 @@ class TargetInstrInfo; class Type; class PPCRegisterInfo : public PPCGenRegisterInfo { - std::map<unsigned, unsigned> ImmToIdxMap; + DenseMap<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; public: @@ -44,23 +44,33 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } + + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; + void lowerCRSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerCRRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; - - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - - void lowerDynamicAlloc(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const; - void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, - int SPAdj, RegScavenger *RS) const; - void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, - int SPAdj, RegScavenger *RS) const; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 8ee9b1ec9f..57a25f5143 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -27,40 +27,40 @@ class PPCReg<string n> : Register<n> { // GPR - One of the 32 32-bit general-purpose registers class GPR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // GP8 - One of the 32 64-bit general-purpose registers class GP8<GPR SubReg, string n> : PPCReg<n> { - field bits<5> Num = SubReg.Num; + let HWEncoding = SubReg.HWEncoding; let SubRegs = [SubReg]; let SubRegIndices = [sub_32]; } // SPR - One of the 32-bit special-purpose registers class SPR<bits<10> num, string n> : PPCReg<n> { - field bits<10> Num = num; + let HWEncoding{9-0} = num; } // FPR - One of the 32 64-bit floating-point registers class FPR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // VR - One of the 32 128-bit vector registers class VR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // CR - One of the 8 4-bit condition registers class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { - field bits<3> Num = num; + let HWEncoding{2-0} = num; let SubRegs = subregs; } // CRBIT - One of the 32 1-bit condition register fields class CRBIT<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // General-purpose registers @@ -86,6 +86,14 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "0">; +def ZERO8 : GP8<ZERO, "0">; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; + // Condition register bits def CR0LT : CRBIT< 0, "0">; def CR0GT : CRBIT< 1, "1">; @@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, LR)>; + R31, R0, R1, FP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, LR8)>; + X31, X13, X0, X1, FP8)>; + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>; +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 18e4c07942..1b7150fd37 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -39,7 +39,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasLFIWAX(false) + , HasFPRND(false) + , HasFPCVT(false) , HasISEL(false) + , HasPOPCNTD(false) + , HasLDBRX(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 15885bd2df..d5dfa1e8e1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,7 +78,12 @@ protected: bool HasQPX; bool HasFSQRT; bool HasSTFIWX; + bool HasLFIWAX; + bool HasFPRND; + bool HasFPCVT; bool HasISEL; + bool HasPOPCNTD; + bool HasLDBRX; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -155,10 +160,15 @@ public: // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } + bool hasLFIWAX() const { return HasLFIWAX; } + bool hasFPRND() const { return HasFPRND; } + bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } + bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 5e9ad347d3..00037edafc 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -122,9 +122,8 @@ llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - // FIXME: PPC currently does not have custom popcnt lowering even though - // there is hardware support. Once this is fixed, update this function - // to reflect the real capabilities of the hardware. + if (ST->hasPOPCNTD() && TyWidth <= 64) + return PSK_FastHardware; return PSK_Software; } diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index b6763aa738..9a7902a82d 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -1,7 +1,6 @@ //===- README.txt - Notes for improving PowerPC-specific code gen ---------===// TODO: -* gpr0 allocation * lmw/stmw pass a la arm load store optimizer for prolog/epilog ===-------------------------------------------------------------------------=== diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index e099a9fc31..0b01433cc9 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,6 +23,8 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); +FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 5995b6f5e8..a266df535d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index a59c775272..e740348717 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass, (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) >; -class Vector_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < +class Vector4_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), (elemType elemClass:$z), (elemType elemClass:$w))), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp new file mode 100644 index 0000000000..0223ec8e4f --- /dev/null +++ b/lib/Target/R600/AMDGPUMachineFunction.cpp @@ -0,0 +1,22 @@ +#include "AMDGPUMachineFunction.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" + +namespace llvm { + +const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType"; + +AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : + MachineFunctionInfo() { + AttributeSet Set = MF.getFunction()->getAttributes(); + Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, + ShaderTypeAttribute); + + if (A.isStringAttribute()) { + StringRef Str = A.getValueAsString(); + if (Str.getAsInteger(0, ShaderType)) + llvm_unreachable("Can't parse shader type!"); + } +} + +} diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h new file mode 100644 index 0000000000..21c8c51dae --- /dev/null +++ b/lib/Target/R600/AMDGPUMachineFunction.h @@ -0,0 +1,29 @@ +//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUMACHINEFUNCTION_H +#define AMDGPUMACHINEFUNCTION_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class AMDGPUMachineFunction : public MachineFunctionInfo { +private: + static const char *ShaderTypeAttribute; +public: + AMDGPUMachineFunction(const MachineFunction &MF); + unsigned ShaderType; +}; + +} +#endif // AMDGPUMACHINEFUNCTION_H diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index b723433c16..dea43b874c 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -17,6 +17,7 @@ #include "AMDGPU.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" @@ -40,13 +41,14 @@ typedef SmallVector<BBValuePair, 2> BBValueVector; typedef SmallPtrSet<BasicBlock *, 8> BBSet; -typedef DenseMap<PHINode *, BBValueVector> PhiMap; +typedef MapVector<PHINode *, BBValueVector> PhiMap; +typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap; + typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap; typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; typedef DenseMap<BasicBlock *, Value *> BBPredicates; typedef DenseMap<BasicBlock *, BBPredicates> PredMap; typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap; -typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap; // The name for newly created blocks. diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 0185747544..e7ea876e2a 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -151,7 +151,9 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); + addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); + addPass(createR600ControlFlowFinalizer(*TM)); addPass(&FinalizeMachineBundlesID); } else { addPass(createSILowerControlFlowPass(*TM)); diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 0c7880d232..fa8f62de9c 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { - if (i == 2) - break; SDValue CstOffset; - if (!Operand.getValueType().isVector() && - SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; + if (Operand.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) + break; + + // Gather others constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } } + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + break; + + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; } - break; case ISD::FNEG: if (NegIdx[i] < 0) break; diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 63c59e1cb5..8efba5846b 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen AMDGPUFrameLowering.cpp AMDGPUIndirectAddressing.cpp AMDGPUMCInstLower.cpp + AMDGPUMachineFunction.cpp AMDGPUSubtarget.cpp AMDGPUStructurizeCFG.cpp AMDGPUTargetMachine.cpp @@ -34,6 +35,8 @@ add_llvm_target(R600CodeGen AMDGPUConvertToISA.cpp AMDGPUInstrInfo.cpp AMDGPURegisterInfo.cpp + R600ControlFlowFinalizer.cpp + R600EmitClauseMarkers.cpp R600ExpandSpecialInstrs.cpp R600InstrInfo.cpp R600ISelLowering.cpp diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 4d3d3e7945..b7cdd7c8cd 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() { //===--- Dwarf Emission Directives -----------------------------------===// HasLEB128 = true; SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::None; - DwarfUsesInlineInfoSection = false; DwarfSectionOffsetDirective = ".offset"; } diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index d20716000d..6ef4d40934 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -66,8 +66,6 @@ private: void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; - void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const; void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; @@ -103,7 +101,8 @@ enum InstrTypes { INSTR_FC, INSTR_NATIVE, INSTR_VTX, - INSTR_EXPORT + INSTR_EXPORT, + INSTR_CFALU }; enum FCInstr { @@ -140,9 +139,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - if (isTexOp(MI.getOpcode())) { - EmitTexInstr(MI, Fixups, OS); - } else if (isFCOp(MI.getOpcode())){ + if (isFCOp(MI.getOpcode())){ EmitFCInstr(MI, OS); } else if (MI.getOpcode() == AMDGPU::RETURN || MI.getOpcode() == AMDGPU::BUNDLE || @@ -150,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, return; } else { switch(MI.getOpcode()) { + case AMDGPU::STACK_SIZE: { + EmitByte(MI.getOperand(0).getImm(), OS); + break; + } case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { uint64_t inst = getBinaryCodeForInstr(MI, Fixups); @@ -175,6 +176,77 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(InstWord2, OS); break; } + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: { + unsigned Opcode = MI.getOpcode(); + bool HasOffsets = (Opcode == AMDGPU::TEX_LD); + unsigned OpOffset = HasOffsets ? 3 : 0; + int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); + int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); + + uint32_t SrcSelect[4] = {0, 1, 2, 3}; + uint32_t Offsets[3] = {0, 0, 0}; + uint64_t CoordType[4] = {1, 1, 1, 1}; + + if (HasOffsets) + for (unsigned i = 0; i < 3; i++) + Offsets[i] = MI.getOperand(i + 2).getImm(); + + if (TextureType == TEXTURE_RECT || + TextureType == TEXTURE_SHADOWRECT) { + CoordType[ELEMENT_X] = 0; + CoordType[ELEMENT_Y] = 0; + } + + if (TextureType == TEXTURE_1D_ARRAY || + TextureType == TEXTURE_SHADOW1D_ARRAY) { + if (Opcode == AMDGPU::TEX_SAMPLE_C_L || + Opcode == AMDGPU::TEX_SAMPLE_C_LB) { + CoordType[ELEMENT_Y] = 0; + } else { + CoordType[ELEMENT_Z] = 0; + SrcSelect[ELEMENT_Z] = ELEMENT_Y; + } + } else if (TextureType == TEXTURE_2D_ARRAY || + TextureType == TEXTURE_SHADOW2D_ARRAY) { + CoordType[ELEMENT_Z] = 0; + } + + + if ((TextureType == TEXTURE_SHADOW1D || + TextureType == TEXTURE_SHADOW2D || + TextureType == TEXTURE_SHADOWRECT || + TextureType == TEXTURE_SHADOW1D_ARRAY) && + Opcode != AMDGPU::TEX_SAMPLE_C_L && + Opcode != AMDGPU::TEX_SAMPLE_C_LB) { + SrcSelect[ELEMENT_W] = ELEMENT_Z; + } + + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | + CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | + CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; + uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | + SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | + SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | + Offsets[2] << 10; + + EmitByte(INSTR_TEX, OS); + Emit(Word01, OS); + Emit(Word2, OS); + break; + } case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: case AMDGPU::EG_ExportBuf: @@ -184,7 +256,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(Inst, OS); break; } - + case AMDGPU::CF_ALU: + case AMDGPU::CF_ALU_PUSH_BEFORE: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_CFALU, OS); + Emit(Inst, OS); + break; + } + case AMDGPU::CF_TC: + case AMDGPU::CF_VC: + case AMDGPU::CF_CALL_FS: + return; + case AMDGPU::WHILE_LOOP: + case AMDGPU::END_LOOP: + case AMDGPU::LOOP_BREAK: + case AMDGPU::CF_CONTINUE: + case AMDGPU::CF_JUMP: + case AMDGPU::CF_ELSE: + case AMDGPU::POP: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_NATIVE, OS); + Emit(Inst, OS); + break; + } default: EmitALUInstr(MI, Fixups, OS); break; @@ -334,99 +428,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, Emit(InlineConstant.i, OS); } -void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - - unsigned Opcode = MI.getOpcode(); - bool hasOffsets = (Opcode == AMDGPU::TEX_LD); - unsigned OpOffset = hasOffsets ? 3 : 0; - int64_t Resource = MI.getOperand(OpOffset + 2).getImm(); - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); - unsigned srcSelect[4] = {0, 1, 2, 3}; - - // Emit instruction type - EmitByte(1, OS); - - // Emit instruction - EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); - - // Emit resource id - EmitByte(Resource, OS); - - // Emit source register - EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); - - // XXX: Emit src isRelativeAddress - EmitByte(0, OS); - - // Emit destination register - EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); - - // XXX: Emit dst isRealtiveAddress - EmitByte(0, OS); - - // XXX: Emit dst select - EmitByte(0, OS); // X - EmitByte(1, OS); // Y - EmitByte(2, OS); // Z - EmitByte(3, OS); // W - - // XXX: Emit lod bias - EmitByte(0, OS); - - // XXX: Emit coord types - unsigned coordType[4] = {1, 1, 1, 1}; - - if (TextureType == TEXTURE_RECT - || TextureType == TEXTURE_SHADOWRECT) { - coordType[ELEMENT_X] = 0; - coordType[ELEMENT_Y] = 0; - } - - if (TextureType == TEXTURE_1D_ARRAY - || TextureType == TEXTURE_SHADOW1D_ARRAY) { - if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) { - coordType[ELEMENT_Y] = 0; - } else { - coordType[ELEMENT_Z] = 0; - srcSelect[ELEMENT_Z] = ELEMENT_Y; - } - } else if (TextureType == TEXTURE_2D_ARRAY - || TextureType == TEXTURE_SHADOW2D_ARRAY) { - coordType[ELEMENT_Z] = 0; - } - - for (unsigned i = 0; i < 4; i++) { - EmitByte(coordType[i], OS); - } - - // XXX: Emit offsets - if (hasOffsets) - for (unsigned i = 2; i < 5; i++) - EmitByte(MI.getOperand(i).getImm()<<1, OS); - else - EmitNullBytes(3, OS); - - // Emit sampler id - EmitByte(Sampler, OS); - - // XXX:Emit source select - if ((TextureType == TEXTURE_SHADOW1D - || TextureType == TEXTURE_SHADOW2D - || TextureType == TEXTURE_SHADOWRECT - || TextureType == TEXTURE_SHADOW1D_ARRAY) - && Opcode != AMDGPU::TEX_SAMPLE_C_L - && Opcode != AMDGPU::TEX_SAMPLE_C_LB) { - srcSelect[ELEMENT_W] = ELEMENT_Z; - } - - for (unsigned i = 0; i < 4; i++) { - EmitByte(srcSelect[i], OS); - } -} - void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { // Emit instruction type diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index e27abccbe1..5af83209a0 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -39,8 +39,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - const MCSubtargetInfo &STI; - MCContext &Ctx; /// \brief Can this operand also contain immediate values? bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; @@ -51,7 +49,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + : MCII(mcii), MRI(mri) { } ~SIMCCodeEmitter() { } diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp new file mode 100644 index 0000000000..bd87d741ec --- /dev/null +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -0,0 +1,264 @@ +//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass compute turns all control flow pseudo instructions into native one +/// computing their address on the fly ; it also sets STACK_SIZE info. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +class R600ControlFlowFinalizer : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + unsigned MaxFetchInst; + + bool isFetch(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::TEX_VTX_CONSTBUF: + case AMDGPU::TEX_VTX_TEXBUF: + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TXD: + case AMDGPU::TXD_SHADOW: + return true; + default: + return false; + } + } + + bool IsTrivialInst(MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::KILL: + case AMDGPU::RETURN: + return true; + default: + return false; + } + } + + MachineBasicBlock::iterator + MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned CfAddress) const { + MachineBasicBlock::iterator ClauseHead = I; + unsigned AluInstCount = 0; + for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { + if (IsTrivialInst(I)) + continue; + if (!isFetch(I)) + break; + AluInstCount ++; + if (AluInstCount > MaxFetchInst) + break; + } + BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), + TII->get(AMDGPU::CF_TC)) + .addImm(CfAddress) // ADDR + .addImm(AluInstCount); // COUNT + return I; + } + void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { + switch (MI->getOpcode()) { + case AMDGPU::WHILE_LOOP: + MI->getOperand(0).setImm(Addr + 1); + break; + default: + MI->getOperand(0).setImm(Addr); + break; + } + } + void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr) + const { + for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end(); + It != E; ++It) { + MachineInstr *MI = *It; + CounterPropagateAddr(MI, Addr); + } + } + +public: + R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { + const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) + MaxFetchInst = 8; + else + MaxFetchInst = 16; + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + unsigned MaxStack = 0; + unsigned CurrentStack = 0; + for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; + ++MB) { + MachineBasicBlock &MBB = *MB; + unsigned CfCount = 0; + std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; + std::vector<std::pair<unsigned, MachineInstr *> > IfThenElseStack; + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + if (MFI->ShaderType == 1) { + BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), + TII->get(AMDGPU::CF_CALL_FS)); + CfCount++; + } + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E;) { + if (isFetch(I)) { + I = MakeFetchClause(MBB, I, 0); + CfCount++; + continue; + } + + MachineBasicBlock::iterator MI = I; + I++; + switch (MI->getOpcode()) { + case AMDGPU::CF_ALU_PUSH_BEFORE: + CurrentStack++; + MaxStack = std::max(MaxStack, CurrentStack); + case AMDGPU::KILLGT: + case AMDGPU::CF_ALU: + CfCount++; + break; + case AMDGPU::WHILELOOP: { + CurrentStack++; + MaxStack = std::max(MaxStack, CurrentStack); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::WHILE_LOOP)) + .addImm(0); + std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, + std::set<MachineInstr *>()); + Pair.second.insert(MIb); + LoopStack.push_back(Pair); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::ENDLOOP: { + CurrentStack--; + std::pair<unsigned, std::set<MachineInstr *> > Pair = + LoopStack.back(); + LoopStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) + .addImm(Pair.first + 1); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::IF_PREDICATE_SET: { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::CF_JUMP)) + .addImm(0) + .addImm(0); + std::pair<unsigned, MachineInstr *> Pair(CfCount, MIb); + IfThenElseStack.push_back(Pair); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::ELSE: { + std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back(); + IfThenElseStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::CF_ELSE)) + .addImm(0) + .addImm(1); + std::pair<unsigned, MachineInstr *> NewPair(CfCount, MIb); + IfThenElseStack.push_back(NewPair); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::ENDIF: { + CurrentStack--; + std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back(); + IfThenElseStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount + 1); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) + .addImm(CfCount + 1) + .addImm(1); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::PREDICATED_BREAK: { + CurrentStack--; + CfCount += 3; + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) + .addImm(CfCount) + .addImm(1); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::LOOP_BREAK)) + .addImm(0); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) + .addImm(CfCount) + .addImm(1); + LoopStack.back().second.insert(MIb); + MI->eraseFromParent(); + break; + } + case AMDGPU::CONTINUE: { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::CF_CONTINUE)) + .addImm(CfCount); + LoopStack.back().second.insert(MIb); + MI->eraseFromParent(); + CfCount++; + break; + } + default: + break; + } + } + BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), + TII->get(AMDGPU::STACK_SIZE)) + .addImm(MaxStack); + } + + return false; + } + + const char *getPassName() const { + return "R600 Control Flow Finalizer Pass"; + } +}; + +char R600ControlFlowFinalizer::ID = 0; + +} + + +llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { + return new R600ControlFlowFinalizer(TM); +} + diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp new file mode 100644 index 0000000000..7c7469a04b --- /dev/null +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -0,0 +1,253 @@ +//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold +/// 128 Alu instructions ; these instructions can access up to 4 prefetched +/// 4 lines of 16 registers from constant buffers. Such ALU clauses are +/// initiated by CF_ALU instructions. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +class R600EmitClauseMarkersPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + + unsigned OccupiedDwords(MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::DOT4_eg_pseudo: + case AMDGPU::DOT4_r600_pseudo: + return 4; + case AMDGPU::KILL: + return 0; + default: + break; + } + + if(TII->isVector(*MI) || + TII->isCubeOp(MI->getOpcode()) || + TII->isReductionOp(MI->getOpcode())) + return 4; + + unsigned NumLiteral = 0; + for (MachineInstr::mop_iterator It = MI->operands_begin(), + E = MI->operands_end(); It != E; ++It) { + MachineOperand &MO = *It; + if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) + ++NumLiteral; + } + return 1 + NumLiteral; + } + + bool isALU(const MachineInstr *MI) const { + if (MI->getOpcode() == AMDGPU::KILLGT) + return false; + if (TII->isALUInstr(MI->getOpcode())) + return true; + if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode())) + return true; + switch (MI->getOpcode()) { + case AMDGPU::PRED_X: + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::COPY: + case AMDGPU::DOT4_eg_pseudo: + case AMDGPU::DOT4_r600_pseudo: + return true; + default: + return false; + } + } + + bool IsTrivialInst(MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::KILL: + case AMDGPU::RETURN: + return true; + default: + return false; + } + } + + // Register Idx, then Const value + std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI) + const { + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + std::vector<std::pair<unsigned, unsigned> > Result; + + if (!TII->isALUInstr(MI->getOpcode())) + return Result; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const)); + } + } + return Result; + } + + std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { + // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 + // (See also R600ISelLowering.cpp) + // ConstIndex value is in [0, 4095]; + return std::pair<unsigned, unsigned>( + ((Sel >> 2) - 512) >> 12, // KC_BANK + // Line Number of ConstIndex + // A line contains 16 constant registers however KCX bank can lock + // two line at the same time ; thus we want to get an even line number. + // Line number can be retrieved with (>>4), using (>>5) <<1 generates + // an even number. + ((((Sel >> 2) - 512) & 4095) >> 5) << 1); + } + + bool SubstituteKCacheBank(MachineInstr *MI, + std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const { + std::vector<std::pair<unsigned, unsigned> > UsedKCache; + std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI); + assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const"); + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned Sel = Consts[i].second; + unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; + unsigned KCacheIndex = Index * 4 + Chan; + const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); + if (CachedConsts.empty()) { + CachedConsts.push_back(BankLine); + UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); + continue; + } + if (CachedConsts[0] == BankLine) { + UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); + continue; + } + if (CachedConsts.size() == 1) { + CachedConsts.push_back(BankLine); + UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); + continue; + } + if (CachedConsts[1] == BankLine) { + UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); + continue; + } + return false; + } + + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + switch(UsedKCache[i].first) { + case 0: + MI->getOperand(Consts[i].first).setReg( + AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second)); + break; + case 1: + MI->getOperand(Consts[i].first).setReg( + AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second)); + break; + default: + llvm_unreachable("Wrong Cache Line"); + } + } + return true; + } + + MachineBasicBlock::iterator + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MachineBasicBlock::iterator ClauseHead = I; + std::vector<std::pair<unsigned, unsigned> > KCacheBanks; + bool PushBeforeModifier = false; + unsigned AluInstCount = 0; + for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { + if (IsTrivialInst(I)) + continue; + if (!isALU(I)) + break; + if (I->getOpcode() == AMDGPU::PRED_X) { + if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) + PushBeforeModifier = true; + AluInstCount ++; + continue; + } + if (TII->isALUInstr(I->getOpcode()) && + !SubstituteKCacheBank(I, KCacheBanks)) + break; + AluInstCount += OccupiedDwords(I); + if (AluInstCount > 124) + break; + } + unsigned Opcode = PushBeforeModifier ? + AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; + BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) + .addImm(0) // ADDR + .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 + .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 + .addImm(KCacheBanks.empty()?0:2) // KM0 + .addImm((KCacheBanks.size() < 2)?0:2) // KM1 + .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 + .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 + .addImm(AluInstCount); // COUNT + return I; + } + +public: + R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + MachineBasicBlock::iterator I = MBB.begin(); + if (I->getOpcode() == AMDGPU::CF_ALU) + continue; // BB was already parsed + for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { + if (isALU(I)) + I = MakeALUClause(MBB, I); + else + ++I; + } + } + return false; + } + + const char *getPassName() const { + return "R600 Emit Clause Markers Pass"; + } +}; + +char R600EmitClauseMarkersPass::ID = 0; + +} + + +llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) { + return new R600EmitClauseMarkersPass(TM); +} + diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a73691dd3c..53e6e51dd2 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -28,7 +28,6 @@ using namespace llvm; R600TargetLowering::R600TargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM), TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) { - setOperationAction(ISD::MUL, MVT::i64, Expand); addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); @@ -58,7 +57,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); - setOperationAction(ISD::FPOW, MVT::f32, Custom); setOperationAction(ISD::ROTL, MVT::i32, Custom); @@ -316,7 +314,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -918,15 +915,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, 2, DL); } -SDValue R600TargetLowering::LowerFPOW(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0)); - SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase); - return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase); -} - /// XXX Only kernel functions are supported, so we can assume for now that /// every function is a kernel function, but in the future we should use /// separate calling conventions for kernel and non-kernel functions. diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 5cb4b912a1..2c09acb9af 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -59,7 +59,6 @@ private: SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index be3318a0b4..08650980fd 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) + const { + assert (Consts.size() <= 12 && "Too many operands in instructions group"); + unsigned Pair1 = 0, Pair2 = 0; + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned ReadConstHalf = Consts[i] & 2; + unsigned ReadConstIndex = Consts[i] & (~3); + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; + if (!Pair1) { + Pair1 = ReadHalfConst; + continue; + } + if (Pair1 == ReadHalfConst) + continue; + if (!Pair2) { + Pair2 = ReadHalfConst; + continue; + } + if (Pair2 != ReadHalfConst) + return false; + } + return true; +} + +bool +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { + std::vector<unsigned> Consts; + for (unsigned i = 0, n = MIs.size(); i < n; i++) { + const MachineInstr *MI = MIs[i]; + + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + if (!isALUInstr(MI->getOpcode())) + continue; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Consts.push_back(Const); + } + } + } + return fitsConstReadLimitations(Consts); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index efe721c00c..bf9569e659 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -53,6 +53,9 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; + bool canBundle(const std::vector<MachineInstr *> &) const; + /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c5fa3347dc..663b41a66d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -234,6 +234,80 @@ class VTX_WORD1_GPR { let Word1{31} = SRF_MODE_ALL; } +class TEX_WORD0 { + field bits<32> Word0; + + bits<5> TEX_INST; + bits<2> INST_MOD; + bits<1> FETCH_WHOLE_QUAD; + bits<8> RESOURCE_ID; + bits<7> SRC_GPR; + bits<1> SRC_REL; + bits<1> ALT_CONST; + bits<2> RESOURCE_INDEX_MODE; + bits<2> SAMPLER_INDEX_MODE; + + let Word0{4-0} = TEX_INST; + let Word0{6-5} = INST_MOD; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = RESOURCE_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{24} = ALT_CONST; + let Word0{26-25} = RESOURCE_INDEX_MODE; + let Word0{28-27} = SAMPLER_INDEX_MODE; +} + +class TEX_WORD1 { + field bits<32> Word1; + + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<7> LOD_BIAS; + bits<1> COORD_TYPE_X; + bits<1> COORD_TYPE_Y; + bits<1> COORD_TYPE_Z; + bits<1> COORD_TYPE_W; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{27-21} = LOD_BIAS; + let Word1{28} = COORD_TYPE_X; + let Word1{29} = COORD_TYPE_Y; + let Word1{30} = COORD_TYPE_Z; + let Word1{31} = COORD_TYPE_W; +} + +class TEX_WORD2 { + field bits<32> Word2; + + bits<5> OFFSET_X; + bits<5> OFFSET_Y; + bits<5> OFFSET_Z; + bits<5> SAMPLER_ID; + bits<3> SRC_SEL_X; + bits<3> SRC_SEL_Y; + bits<3> SRC_SEL_Z; + bits<3> SRC_SEL_W; + + let Word2{4-0} = OFFSET_X; + let Word2{9-5} = OFFSET_Y; + let Word2{14-10} = OFFSET_Z; + let Word2{19-15} = SAMPLER_ID; + let Word2{22-20} = SRC_SEL_X; + let Word2{25-23} = SRC_SEL_Y; + let Word2{28-26} = SRC_SEL_Z; + let Word2{31-29} = SRC_SEL_W; +} + /* XXX: R600 subtarget uses a slightly different encoding than the other subtargets. We currently handle this in R600MCCodeEmitter, but we may @@ -277,9 +351,9 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(opName, + !strconcat(" ", opName, "$clamp $dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " + "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -318,10 +392,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(opName, + !strconcat(" ", opName, "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " - "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " + "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -356,10 +430,10 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(opName, "$clamp $dst$dst_rel, " - "$src0_neg$src0$src0_sel$src0_rel, " - "$src1_neg$src1$src1_sel$src1_rel, " - "$src2_neg$src2$src2_sel$src2_rel, " + !strconcat(" ", opName, "$clamp $dst$dst_rel, " + "$src0_neg$src0$src0_rel, " + "$src1_neg$src1$src1_rel, " + "$src2_neg$src2$src2_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -386,12 +460,32 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), - !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"), + (outs R600_Reg128:$DST_GPR), + (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget), + !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"), pattern, - itin>{ - let Inst {10-0} = inst; + itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let TEX_INST = inst{4-0}; + let SRC_REL = 0; + let DST_REL = 0; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let LOD_BIAS = 0; + + let INST_MOD = 0; + let FETCH_WHOLE_QUAD = 0; + let ALT_CONST = 0; + let SAMPLER_INDEX_MODE = 0; + + let COORD_TYPE_X = 0; + let COORD_TYPE_Y = 0; + let COORD_TYPE_Z = 0; + let COORD_TYPE_W = 0; } } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -671,6 +765,167 @@ class ExportBufInst : InstR600ISA<( let Inst{63-32} = Word1; } +//===----------------------------------------------------------------------===// +// Control Flow Instructions +//===----------------------------------------------------------------------===// + +class CF_ALU_WORD0 { + field bits<32> Word0; + + bits<22> ADDR; + bits<4> KCACHE_BANK0; + bits<4> KCACHE_BANK1; + bits<2> KCACHE_MODE0; + + let Word0{21-0} = ADDR; + let Word0{25-22} = KCACHE_BANK0; + let Word0{29-26} = KCACHE_BANK1; + let Word0{31-30} = KCACHE_MODE0; +} + +class CF_ALU_WORD1 { + field bits<32> Word1; + + bits<2> KCACHE_MODE1; + bits<8> KCACHE_ADDR0; + bits<8> KCACHE_ADDR1; + bits<7> COUNT; + bits<1> ALT_CONST; + bits<4> CF_INST; + bits<1> WHOLE_QUAD_MODE; + bits<1> BARRIER; + + let Word1{1-0} = KCACHE_MODE1; + let Word1{9-2} = KCACHE_ADDR0; + let Word1{17-10} = KCACHE_ADDR1; + let Word1{24-18} = COUNT; + let Word1{25} = ALT_CONST; + let Word1{29-26} = CF_INST; + let Word1{30} = WHOLE_QUAD_MODE; + let Word1{31} = BARRIER; +} + +class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), +(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1, +i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT), +!strconcat(OpName, " $COUNT, @$ADDR, " +"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]" +", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"), +[] >, CF_ALU_WORD0, CF_ALU_WORD1 { + field bits<64> Inst; + + let CF_INST = inst; + let ALT_CONST = 0; + let WHOLE_QUAD_MODE = 0; + let BARRIER = 1; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +class CF_WORD0 { + field bits<32> Word0; + + bits<24> ADDR; + bits<3> JUMPTABLE_SEL; + + let Word0{23-0} = ADDR; + let Word0{26-24} = JUMPTABLE_SEL; +} + +class CF_WORD1 { + field bits<32> Word1; + + bits<3> POP_COUNT; + bits<5> CF_CONST; + bits<2> COND; + bits<6> COUNT; + bits<1> VALID_PIXEL_MODE; + bits<8> CF_INST; + bits<1> BARRIER; + + let Word1{2-0} = POP_COUNT; + let Word1{7-3} = CF_CONST; + let Word1{9-8} = COND; + let Word1{15-10} = COUNT; + let Word1{20} = VALID_PIXEL_MODE; + let Word1{29-22} = CF_INST; + let Word1{31} = BARRIER; +} + +class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 { + field bits<64> Inst; + + let CF_INST = inst; + let BARRIER = 1; + let JUMPTABLE_SEL = 0; + let CF_CONST = 0; + let VALID_PIXEL_MODE = 0; + let COND = 0; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT), +"TEX $COUNT @$ADDR"> { + let POP_COUNT = 0; +} + +def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT), +"VTX $COUNT @$ADDR"> { + let POP_COUNT = 0; +} + +def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; +} + +def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; +} + +def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> { + let ADDR = 0; + let COUNT = 0; + let POP_COUNT = 0; +} + +def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; +} + +def CF_ALU : ALU_CLAUSE<8, "ALU">; +def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; + +def STACK_SIZE : AMDGPUInst <(outs), +(ins i32imm:$num), "nstack $num", [] > { + field bits<8> Inst; + bits<8> num; + let Inst = num; +} + let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// @@ -867,25 +1122,33 @@ def CNDGT_INT : R600_3OP < def TEX_LD : R600_TEX < 0x03, "TEX_LD", - [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR, + imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, + imm:$SAMPLER_ID, imm:$textureTarget))] > { -let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget"; -let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget); +let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z," + "$RESOURCE_ID, $SAMPLER_ID, $textureTarget"; +let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, + i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, + i32imm:$textureTarget); } def TEX_GET_TEXTURE_RESINFO : R600_TEX < 0x04, "TEX_GET_TEXTURE_RESINFO", - [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_GET_GRADIENTS_H : R600_TEX < 0x07, "TEX_GET_GRADIENTS_H", - [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_GET_GRADIENTS_V : R600_TEX < 0x08, "TEX_GET_GRADIENTS_V", - [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SET_GRADIENTS_H : R600_TEX < @@ -900,32 +1163,38 @@ def TEX_SET_GRADIENTS_V : R600_TEX < def TEX_SAMPLE : R600_TEX < 0x10, "TEX_SAMPLE", - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C : R600_TEX < 0x18, "TEX_SAMPLE_C", - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_L : R600_TEX < 0x11, "TEX_SAMPLE_L", - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C_L : R600_TEX < 0x19, "TEX_SAMPLE_C_L", - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_LB : R600_TEX < 0x12, "TEX_SAMPLE_LB", - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C_LB : R600_TEX < 0x1A, "TEX_SAMPLE_C_LB", - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_G : R600_TEX < @@ -1141,6 +1410,7 @@ let Predicates = [isR600] in { def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; + def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>; def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt R600_Reg32:$src), @@ -1212,6 +1482,7 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; +def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>; def : SIN_PAT <SIN_eg>; def : COS_PAT <COS_eg>; def : Pat<(fsqrt R600_Reg32:$src), @@ -1540,13 +1811,14 @@ def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -def LOG_IEEE_ : LOG_IEEE_Common<0x83>; +def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; def SIN_cm : SIN_Common<0x8D>; def COS_cm : COS_Common<0x8E>; } // End isVector = 1 +def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>; def : SIN_PAT <SIN_cm>; def : COS_PAT <COS_cm>; @@ -1979,8 +2251,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; +def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; +def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; // bitconvert patterns diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 40aec833ea..018b403633 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -13,6 +13,6 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo() { - memset(Outputs, 0, sizeof(Outputs)); - } + : AMDGPUMachineFunction(MF) { } + + diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 4b901f4bbc..99c1f91b09 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -14,19 +14,17 @@ #define R600MACHINEFUNCTIONINFO_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "AMDGPUMachineFunction.h" #include <vector> namespace llvm { -class R600MachineFunctionInfo : public MachineFunctionInfo { - +class R600MachineFunctionInfo : public AMDGPUMachineFunction { public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector<unsigned, 4> LiveOuts; std::vector<unsigned> IndirectRegs; - SDNode *Outputs[16]; }; } // End llvm namespace diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 19baef94c7..9074364bb3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); InstKindLimit[IDAlu] = 120; // 120 minus 8 for security @@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -class ConstPairs { -private: - unsigned XYPair; - unsigned ZWPair; -public: - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { - for (unsigned i = 0; i < 3; i++) { - unsigned ReadConstChan = ReadConst[i] & 3; - unsigned ReadConstIndex = ReadConst[i] & (~3); - if (ReadConstChan < 2) { - if (!XYPair) { - XYPair = ReadConstIndex; - } - } else { - if (!ZWPair) { - ZWPair = ReadConstIndex; - } - } - } - } - - bool isCompatibleWith(const ConstPairs& CP) const { - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); - } -}; - -static -const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { - unsigned ReadConsts[3] = {0, 0, 0}; - R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - - if (!TII->isALUInstr(MI.getOpcode())) - return ConstPairs(ReadConsts); - - for (unsigned i = 0; i < 3; i++) { - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); - if (SrcIdx < 0) - break; - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) - ReadConsts[i] =MI.getOperand( - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); - } - return ConstPairs(ReadConsts); -} - -bool -R600SchedStrategy::isBundleable(const MachineInstr& MI) { - const ConstPairs &MIPair = getPairs(TII, MI); - for (unsigned i = 0; i < 4; i++) { - if (!InstructionsGroupCandidate[i]) - continue; - const ConstPairs &IGPair = getPairs(TII, - *InstructionsGroupCandidate[i]->getInstr()); - if (!IGPair.isCompatibleWith(MIPair)) - return false; - } - return true; -} - SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { if (Q.empty()) return NULL; for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); It != E; ++It) { SUnit *SU = *It; - if (isBundleable(*SU->getInstr())) { + InstructionsGroupCandidate.push_back(SU->getInstr()); + if (TII->canBundle(InstructionsGroupCandidate)) { + InstructionsGroupCandidate.pop_back(); Q.erase(It); return SU; + } else { + InstructionsGroupCandidate.pop_back(); } } return NULL; @@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); + InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit *SU = AttemptFillSlot(Chan); if (SU) { OccupedSlotsMask |= (1 << Chan); - InstructionsGroupCandidate[Chan] = SU; + InstructionsGroupCandidate.push_back(SU->getInstr()); return SU; } } diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index d74ff1e076..3d0367fd8e 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -98,7 +98,7 @@ public: virtual void releaseBottomNode(SUnit *SU); private: - SUnit *InstructionsGroupCandidate[4]; + std::vector<MachineInstr *> InstructionsGroupCandidate; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; @@ -112,7 +112,6 @@ private: void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); SUnit* pickOther(int QID); - bool isBundleable(const MachineInstr& MI); void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); }; diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index ce5994ca36..03f49761ea 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -43,6 +43,37 @@ foreach Index = 0-127 in { Index>; } +// KCACHE_BANK0 +foreach Index = 159-128 in { + foreach Chan = [ "X", "Y", "Z", "W" ] in { + // 32-bit Temporary Registers + def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>; + } + // 128-bit Temporary Registers + def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW", + [!cast<Register>("KC0_"#Index#"_X"), + !cast<Register>("KC0_"#Index#"_Y"), + !cast<Register>("KC0_"#Index#"_Z"), + !cast<Register>("KC0_"#Index#"_W")], + Index>; +} + +// KCACHE_BANK1 +foreach Index = 191-160 in { + foreach Chan = [ "X", "Y", "Z", "W" ] in { + // 32-bit Temporary Registers + def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>; + } + // 128-bit Temporary Registers + def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW", + [!cast<Register>("KC1_"#Index#"_X"), + !cast<Register>("KC1_"#Index#"_Y"), + !cast<Register>("KC1_"#Index#"_Z"), + !cast<Register>("KC1_"#Index#"_W")], + Index>; +} + + // Array Base Register holding input in FS foreach Index = 448-480 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; @@ -80,6 +111,38 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", } // End isAllocatable = 0 +def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_X", 128, 159))>; + +def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_Y", 128, 159))>; + +def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_Z", 128, 159))>; + +def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_W", 128, 159))>; + +def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32, + (interleave R600_KC0_X, R600_KC0_Y, + R600_KC0_Z, R600_KC0_W)>; + +def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_X", 160, 191))>; + +def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_Y", 160, 191))>; + +def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_Z", 160, 191))>; + +def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_W", 160, 191))>; + +def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32, + (interleave R600_KC1_X, R600_KC1_Y, + R600_KC1_Z, R600_KC1_W)>; + def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_X", 0, 127), AR_X)>; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 063f5faa63..6f0c307615 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -69,7 +74,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SETCC); - setSchedulingPreference(Sched::Source); + setSchedulingPreference(Sched::RegPressure); } SDValue SITargetLowering::LowerFormalArguments( @@ -203,32 +208,23 @@ SDValue SITargetLowering::LowerFormalArguments( MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { - MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); - MachineBasicBlock::iterator I = MI; switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; - case AMDGPU::SI_WQM: - LowerSI_WQM(MI, *BB, I, MRI); - break; } return BB; } -void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); - - MI->eraseFromParent(); -} - EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } +MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { + return MVT::i32; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// @@ -488,22 +484,23 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op, MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); SDNode *Node = Op.getNode(); - int OpClass; + const TargetRegisterClass *OpClass; if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) { const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode()); - OpClass = Desc.OpInfo[Op.getResNo()].RegClass; + int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass; + if (OpClassID == -1) + OpClass = getRegClassFor(Op.getSimpleValueType()); + else + OpClass = TRI->getRegClass(OpClassID); } else if (Node->getOpcode() == ISD::CopyFromReg) { RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode()); - OpClass = MRI.getRegClass(Reg->getReg())->getID(); + OpClass = MRI.getRegClass(Reg->getReg()); } else return false; - if (OpClass == -1) - return false; - - return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass)); + return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass); } /// \brief Make sure that we don't exeed the number of allowed scalars @@ -547,6 +544,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned NumDefs = Desc->getNumDefs(); unsigned NumOps = Desc->getNumOperands(); + // Commuted opcode if available + int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; + const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev); + + assert(!DescRev || DescRev->getNumDefs() == NumDefs); + assert(!DescRev || DescRev->getNumOperands() == NumOps); + // e64 version if available, -1 otherwise int OpcodeE64 = AMDGPU::getVOPe64(Opcode); const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); @@ -599,41 +603,54 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, // Is this a VSrc or SSrc operand ? unsigned RegClass = Desc->OpInfo[Op].RegClass; - if (!isVSrc(RegClass) && !isSSrc(RegClass)) { + if (isVSrc(RegClass) || isSSrc(RegClass)) { + // Try to fold the immediates + if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { + // Folding didn't worked, make sure we don't hit the SReg limit + ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); + } + continue; + } + + if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) { - if (i == 1 && Desc->isCommutable() && - fitsRegClass(DAG, Ops[0], RegClass) && - foldImm(Ops[1], Immediate, ScalarSlotUsed)) { + unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; + assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); - assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) || - isSSrc(Desc->OpInfo[NumDefs].RegClass)); + // Test if it makes sense to swap operands + if (foldImm(Ops[1], Immediate, ScalarSlotUsed) || + (!fitsRegClass(DAG, Ops[1], RegClass) && + fitsRegClass(DAG, Ops[1], OtherRegClass))) { // Swap commutable operands SDValue Tmp = Ops[1]; Ops[1] = Ops[0]; Ops[0] = Tmp; - } else if (DescE64 && !Immediate) { - // Test if it makes sense to switch to e64 encoding - - RegClass = DescE64->OpInfo[Op].RegClass; - int32_t TmpImm = -1; - if ((isVSrc(RegClass) || isSSrc(RegClass)) && - foldImm(Ops[i], TmpImm, ScalarSlotUsed)) { - - Immediate = -1; - Promote2e64 = true; - Desc = DescE64; - DescE64 = 0; - } + Desc = DescRev; + DescRev = 0; + continue; } - continue; } - // Try to fold the immediates - if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { - // Folding didn't worked, make sure we don't hit the SReg limit - ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); + if (DescE64 && !Immediate) { + + // Test if it makes sense to switch to e64 encoding + unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass; + if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass)) + continue; + + int32_t TmpImm = -1; + if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) || + (!fitsRegClass(DAG, Ops[i], RegClass) && + fitsRegClass(DAG, Ops[1], OtherRegClass))) { + + // Switch to e64 encoding + Immediate = -1; + Promote2e64 = true; + Desc = DescE64; + DescE64 = 0; + } } } @@ -647,10 +664,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) Ops.push_back(Node->getOperand(i)); - // Either create a complete new or update the current instruction - if (Promote2e64) - return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), - Node->getVTList(), Ops.data(), Ops.size()); - else - return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + // Create a complete new instruction + return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(), + Node->getVTList(), Ops.data(), Ops.size()); } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 0411565ee3..5ad2f40f0f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -24,9 +24,6 @@ class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; const TargetRegisterInfo * TRI; - void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -48,6 +45,7 @@ public: virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; + virtual MVT getScalarShiftAmountTy(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 67fbdf7be1..98bd3dbb66 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) { Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); } -bool SIInsertWaits::unorderedDefines(MachineInstr &MI) { - - uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::LGKM_CNT) - return true; - - if (TSFlags & SIInstrFlags::EXP_CNT) - return ExpInstrTypesSeen == 3; - - return false; -} - Counters SIInsertWaits::handleOperands(MachineInstr &MI) { - bool UnorderedDefines = unorderedDefines(MI); Counters Result = ZeroCounts; // For each register affected by this @@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { if (Op.isDef()) { increaseCounters(Result, UsedRegs[j]); - if (UnorderedDefines) - increaseCounters(Result, DefinedRegs[j]); + increaseCounters(Result, DefinedRegs[j]); } if (Op.isUse()) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index de2373b11a..0bfcef562f 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -65,6 +65,26 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Opcode; const int16_t *SubIndices; + if (AMDGPU::M0 == DestReg) { + // Check if M0 isn't already set to this value + for (MachineBasicBlock::reverse_iterator E = MBB.rend(), + I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { + + if (!I->definesRegister(AMDGPU::M0)) + continue; + + unsigned Opc = I->getOpcode(); + if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) + break; + + if (!I->readsRegister(SrcReg)) + break; + + // The copy isn't necessary + return; + } + } + if (AMDGPU::SReg_32RegClass.contains(DestReg)) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) @@ -138,6 +158,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } +unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { + + int NewOpc; + + // Try to map original to commuted opcode + if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + return NewOpc; + + // Try to map commuted to original opcode + if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + return NewOpc; + + return Opcode; +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { @@ -145,7 +180,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, !MI->getOperand(2).isReg()) return 0; - return TargetInstrInfo::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + + if (MI) + MI->setDesc(get(commuteOpcode(MI->getOpcode()))); + + return MI; } MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 5789af5d21..d4e60e5086 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,6 +35,8 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + unsigned commuteOpcode(unsigned Opcode) const; + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI=false) const; @@ -76,6 +78,8 @@ public: namespace AMDGPU { int getVOPe64(uint16_t Opcode); + int getCommuteRev(uint16_t Opcode); + int getCommuteOrig(uint16_t Opcode); } // End namespace AMDGPU diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 260c651dd4..617f0b871c 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK < opName#" $dst, $src0", pattern >; -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass, + RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), - (ins SReg_64:$sbase, i32imm:$offset), + (ins baseClass:$sbase, i32imm:$offset), asm#" $dst, $sbase, $offset", [] >; def _SGPR : SMRD < op, 0, (outs dstClass:$dst), - (ins SReg_64:$sbase, SReg_32:$soff), + (ins baseClass:$sbase, SReg_32:$soff), asm#" $dst, $sbase, $soff", [] >; } @@ -137,6 +138,11 @@ class VOP <string opName> { string OpName = opName; } +class VOP2_REV <string revOp, bit isOrig> { + string RevOp = revOp; + bit IsOrig = isOrig; +} + multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src, string opName, list<dag> pattern> { @@ -165,11 +171,11 @@ multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>; multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> { + string opName, list<dag> pattern, string revOp> { def _e32 : VOP2 < op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP <opName>; + >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -178,23 +184,26 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP <opName> { + >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> { let SRC2 = SIOperand.ZERO; } } -multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> - : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>; +multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> + : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>; -multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> - : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>; +multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> + : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>; -multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { +multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> { def _e32 : VOP2 < op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP <opName>; + >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -203,7 +212,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP <opName> { + >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> { let SRC2 = SIOperand.ZERO; /* the VOP2 variant puts the carry out into VCC, the VOP3 variant can write it into any SGPR. We currently don't use the carry out, @@ -304,7 +313,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", @@ -326,4 +335,22 @@ def getVOPe64 : InstrMapping { let ValueCols = [["8"]]; } +// Maps an original opcode to its commuted version +def getCommuteRev : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +// Maps an commuted opcode to its original version +def getCommuteOrig : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0ab9e4ec0c..4f734f9124 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT //def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; //def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; //def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; +def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; +def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; //def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; @@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM let mayLoad = 1 in { -defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; -//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; -//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +defm S_BUFFER_LOAD_DWORD : SMRD_Helper < + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < + 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < + 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < + 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < + 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 +>; } // mayLoad = 1 @@ -790,13 +804,13 @@ let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; -} // End isCommutable = 1 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; +defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">; +} // End isCommutable = 1 -defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; let isCommutable = 1 in { @@ -834,16 +848,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -} // End isCommutable = 1 +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", + [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; +defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; -defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; -defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; -defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", + [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; +defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; -let isCommutable = 1 in { +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; +defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] @@ -864,25 +882,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; -let Defs = [VCC] in { // Carry-out goes to VCC -let isCommutable = 1 in { +let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; -} // End isCommutable = 1 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; -defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; let Uses = [VCC] in { // Carry-out comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; } // End Uses = [VCC] -} // End Defs = [VCC] +} // End isCommutable = 1, Defs = [VCC] + defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; @@ -955,14 +972,31 @@ def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; + +let isCommutable = 1 in { + def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +} // isCommutable = 1 + def : Pat < (mul VSrc_32:$src0, VReg_32:$src1), (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) >; -def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +def : Pat < + (mulhu VSrc_32:$src0, VReg_32:$src1), + (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; + +def : Pat < + (mulhs VSrc_32:$src0, VReg_32:$src1), + (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; + def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; @@ -1051,17 +1085,6 @@ def LOAD_CONST : AMDGPUShaderInst < [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] >; -let usesCustomInserter = 1 in { - -def SI_WQM : InstSI < - (outs), - (ins), - "SI_WQM", - [(int_SI_wqm)] ->; - -} // end usesCustomInserter - // SI Psuedo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. @@ -1133,6 +1156,31 @@ def SI_KILL : InstSI < } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] +let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { + +def SI_INDIRECT_SRC : InstSI < + (outs VReg_32:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off", + [] +>; + +class SI_INDIRECT_DST<RegisterClass rc> : InstSI < + (outs rc:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val), + "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val", + [] +> { + let Constraints = "$src = $dst"; +} + +def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; +def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; +def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; +def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; + +} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] + } // end IsCodeGenOnly, isPseudo def : Pat< @@ -1165,10 +1213,9 @@ def : Pat < /* int_SI_sample for simple 1D texture lookup */ def : Pat < - (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr), + (int_SI_sample imm:$writemask, VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler, imm), - (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, - (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)), + (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1176,8 +1223,7 @@ class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, imm), - (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1185,8 +1231,7 @@ class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), - (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1194,8 +1239,7 @@ class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY), - (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1203,8 +1247,7 @@ class SampleShadowPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW), - (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1212,8 +1255,7 @@ class SampleShadowArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY), - (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1241,22 +1283,83 @@ defm : SamplePatterns<VReg_128, v4i32>; defm : SamplePatterns<VReg_256, v8i32>; defm : SamplePatterns<VReg_512, v16i32>; -def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>; +/********** ============================================ **********/ +/********** Extraction, Insertion, Building and Casting **********/ +/********** ============================================ **********/ -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>; +foreach Index = 0-2 in { + def Extract_Element_v2i32_#Index : Extract_Element < + i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2i32_#Index : Insert_Element < + i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v2f32_#Index : Extract_Element < + f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2f32_#Index : Insert_Element < + f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-3 in { + def Extract_Element_v4i32_#Index : Extract_Element < + i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4i32_#Index : Insert_Element < + i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v4f32_#Index : Extract_Element < + f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4f32_#Index : Insert_Element < + f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-7 in { + def Extract_Element_v8i32_#Index : Extract_Element < + i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8i32_#Index : Insert_Element < + i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v8f32_#Index : Extract_Element < + f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8f32_#Index : Insert_Element < + f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-15 in { + def Extract_Element_v16i32_#Index : Extract_Element < + i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16i32_#Index : Insert_Element < + i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v16f32_#Index : Extract_Element < + f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16f32_#Index : Insert_Element < + f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; +} def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; -def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>; +def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>; def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; +def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>; def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; +def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1340,8 +1443,7 @@ def : Pat < /********** ================== **********/ /* llvm.AMDGPU.pow */ -/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ -def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>; +def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>; def : Pat < (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1), @@ -1389,6 +1491,24 @@ def : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) >; +// 1. Offset as 8bit DWORD immediate +def : Pat < + (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset)) +>; + +// 3. Offset in an 32Bit VGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, VReg_32:$voff), + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ @@ -1426,4 +1546,62 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; +/********** ====================== **********/ +/********** Indirect adressing **********/ +/********** ====================== **********/ + +multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt, + SI_INDIRECT_DST IndDst> { + // 1. Extract with offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off)) + >; + + // 2. Extract without offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 VReg_32:$idx))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0)) + >; + + // 3. Insert with offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val)) + >; + + // 4. Insert without offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 VReg_32:$idx))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val)) + >; +} + +defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>; +defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>; +defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>; +defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; + +/********** =============== **********/ +/********** Conditions **********/ +/********** =============== **********/ + +def : Pat< + (i1 (setcc f32:$src0, f32:$src1, SETO)), + (V_CMP_O_F32_e64 f32:$src0, f32:$src1) +>; + +def : Pat< + (i1 (setcc f32:$src0, f32:$src1, SETUO)), + (V_CMP_U_F32_e64 f32:$src0, f32:$src1) +>; + } // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7c23d1706b..0af378edfe 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -16,12 +16,10 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - /* XXX: We may need a seperate intrinsic here for loading integer values */ - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; - def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; - def int_SI_wqm : Intrinsic <[], [], []>; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; def int_SI_sampleb : Sample; @@ -29,8 +27,8 @@ let TargetPrefix = "SI", isTarget = 1 in { /* Interpolation Intrinsics */ - def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; - def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>; + def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>; /* Control flow Intrinsics */ diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index b215aa22db..2b60eb9fb3 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -66,6 +66,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; + const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -84,9 +85,14 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void IndirectSrc(MachineInstr &MI); + void IndirectDst(MachineInstr &MI); + public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + MachineFunctionPass(ID), TRI(tm.getRegisterInfo()), + TII(tm.getInstrInfo()) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -191,7 +197,8 @@ void SILowerControlFlowPass::Else(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Src = MI.getOperand(1).getReg(); - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) + BuildMI(MBB, MBB.getFirstNonPHI(), DL, + TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) .addReg(Src); // Saved EXEC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) @@ -302,9 +309,108 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator I = MI; + + unsigned Save = MI.getOperand(1).getReg(); + unsigned Idx = MI.getOperand(3).getReg(); + + if (AMDGPU::SReg_32RegClass.contains(Idx)) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + MBB.insert(I, MovRel); + MI.eraseFromParent(); + return; + } + + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); + + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); + + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC) + .addReg(Idx); + + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC); + + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); + + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); + + // Do the actual move + MBB.insert(I, MovRel); + + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); + + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Vec = MI.getOperand(2).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Vec, RegState::Implicit); + + LoadM0(MI, MovRel); +} + +void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + unsigned Val = MI.getOperand(5).getReg(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) + .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(Val) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Dst, RegState::Implicit); + + LoadM0(MI, MovRel); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool HaveKill = false; + bool NeedWQM = false; unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); @@ -363,9 +469,33 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::S_BRANCH: Branch(MI); break; + + case AMDGPU::SI_INDIRECT_SRC: + IndirectSrc(MI); + break; + + case AMDGPU::SI_INDIRECT_DST_V2: + case AMDGPU::SI_INDIRECT_DST_V4: + case AMDGPU::SI_INDIRECT_DST_V8: + case AMDGPU::SI_INDIRECT_DST_V16: + IndirectDst(MI); + break; + + case AMDGPU::V_INTERP_P1_F32: + case AMDGPU::V_INTERP_P2_F32: + case AMDGPU::V_INTERP_MOV_F32: + NeedWQM = true; + break; + } } } + if (NeedWQM) { + MachineBasicBlock &MBB = MF.front(); + BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), + AMDGPU::EXEC).addReg(AMDGPU::EXEC); + } + return true; } diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index 1a4e4cbbbb..ee0e30755f 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,25 +10,9 @@ #include "SIMachineFunctionInfo.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Function.h" using namespace llvm; -const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType"; - SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo(), - ShaderType(0), - PSInputAddr(0) { - - AttributeSet Set = MF.getFunction()->getAttributes(); - Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, - ShaderTypeAttribute); - - if (A.isStringAttribute()) { - StringRef Str = A.getValueAsString(); - if (Str.getAsInteger(0, ShaderType)) - llvm_unreachable("Can't parse shader type!"); - } -} + : AMDGPUMachineFunction(MF), + PSInputAddr(0) { } diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 91a809b124..6da9f7f9a1 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -15,18 +15,15 @@ #ifndef SIMACHINEFUNCTIONINFO_H_ #define SIMACHINEFUNCTIONINFO_H_ -#include "llvm/CodeGen/MachineFunction.h" +#include "AMDGPUMachineFunction.h" namespace llvm { /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. -class SIMachineFunctionInfo : public MachineFunctionInfo { +class SIMachineFunctionInfo : public AMDGPUMachineFunction { public: - static const char *ShaderTypeAttribute; - SIMachineFunctionInfo(const MachineFunction &MF); - unsigned ShaderType; unsigned PSInputAddr; }; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 88275c523f..99278ae8dc 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -30,6 +30,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + return RC->getNumRegs(); +} + const TargetRegisterClass * SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { switch (rc->getID()) { diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 40171e4450..caec228413 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -31,6 +31,9 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { virtual BitVector getReservedRegs(const MachineFunction &MF) const; + virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// \param RC is an AMDIL reg class. /// /// \returns the SI register class that is equivalent to \p RC. diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 3dcad506d2..4f14931a9c 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; +def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// // [SV]Src_* register classes, can have either an immediate or an register @@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; +def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>; def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 90b698d507..c3810b2a4d 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -182,11 +182,11 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> { def rr : F3_1<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; def ri : F3_2<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>; + [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>; } /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no @@ -243,10 +243,10 @@ let Predicates = [HasNoV9] in { // Only emit these in V8 mode. "!FpMOVD $src, $dst", []>; def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), "!FpNEGD $src, $dst", - [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; + [(set f64:$dst, (fneg f64:$src))]>; def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), "!FpABSD $src, $dst", - [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; + [(set f64:$dst, (fabs f64:$src))]>; } // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after @@ -257,19 +257,16 @@ let Uses = [ICC], usesCustomInserter = 1 in { def SELECT_CC_Int_ICC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_ICC PSEUDO!", - [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; + [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>; def SELECT_CC_FP_ICC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_ICC PSEUDO!", - [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>; def SELECT_CC_DFP_ICC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_ICC PSEUDO!", - [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F, - imm:$Cond))]>; + [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>; } let usesCustomInserter = 1, Uses = [FCC] in { @@ -277,19 +274,16 @@ let usesCustomInserter = 1, Uses = [FCC] in { def SELECT_CC_Int_FCC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_FCC PSEUDO!", - [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; + [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>; def SELECT_CC_FP_FCC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_FCC PSEUDO!", - [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>; def SELECT_CC_DFP_FCC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_FCC PSEUDO!", - [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F, - imm:$Cond))]>; + [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>; } @@ -309,111 +303,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { def LDSBrr : F3_1<3, 0b001001, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldsb [$addr], $dst", - [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>; + [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>; def LDSBri : F3_2<3, 0b001001, (outs IntRegs:$dst), (ins MEMri:$addr), "ldsb [$addr], $dst", - [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>; + [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>; def LDSHrr : F3_1<3, 0b001010, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldsh [$addr], $dst", - [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>; + [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>; def LDSHri : F3_2<3, 0b001010, (outs IntRegs:$dst), (ins MEMri:$addr), "ldsh [$addr], $dst", - [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>; + [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>; def LDUBrr : F3_1<3, 0b000001, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldub [$addr], $dst", - [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>; + [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>; def LDUBri : F3_2<3, 0b000001, (outs IntRegs:$dst), (ins MEMri:$addr), "ldub [$addr], $dst", - [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>; + [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>; def LDUHrr : F3_1<3, 0b000010, (outs IntRegs:$dst), (ins MEMrr:$addr), "lduh [$addr], $dst", - [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>; + [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>; def LDUHri : F3_2<3, 0b000010, (outs IntRegs:$dst), (ins MEMri:$addr), "lduh [$addr], $dst", - [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>; + [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>; def LDrr : F3_1<3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRrr:$addr))]>; + [(set i32:$dst, (load ADDRrr:$addr))]>; def LDri : F3_2<3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRri:$addr))]>; + [(set i32:$dst, (load ADDRri:$addr))]>; // Section B.2 - Load Floating-point Instructions, p. 92 def LDFrr : F3_1<3, 0b100000, (outs FPRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set FPRegs:$dst, (load ADDRrr:$addr))]>; + [(set f32:$dst, (load ADDRrr:$addr))]>; def LDFri : F3_2<3, 0b100000, (outs FPRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set FPRegs:$dst, (load ADDRri:$addr))]>; + [(set f32:$dst, (load ADDRri:$addr))]>; def LDDFrr : F3_1<3, 0b100011, (outs DFPRegs:$dst), (ins MEMrr:$addr), "ldd [$addr], $dst", - [(set DFPRegs:$dst, (load ADDRrr:$addr))]>; + [(set f64:$dst, (load ADDRrr:$addr))]>; def LDDFri : F3_2<3, 0b100011, (outs DFPRegs:$dst), (ins MEMri:$addr), "ldd [$addr], $dst", - [(set DFPRegs:$dst, (load ADDRri:$addr))]>; + [(set f64:$dst, (load ADDRri:$addr))]>; // Section B.4 - Store Integer Instructions, p. 95 def STBrr : F3_1<3, 0b000101, (outs), (ins MEMrr:$addr, IntRegs:$src), "stb $src, [$addr]", - [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>; + [(truncstorei8 i32:$src, ADDRrr:$addr)]>; def STBri : F3_2<3, 0b000101, (outs), (ins MEMri:$addr, IntRegs:$src), "stb $src, [$addr]", - [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>; + [(truncstorei8 i32:$src, ADDRri:$addr)]>; def STHrr : F3_1<3, 0b000110, (outs), (ins MEMrr:$addr, IntRegs:$src), "sth $src, [$addr]", - [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>; + [(truncstorei16 i32:$src, ADDRrr:$addr)]>; def STHri : F3_2<3, 0b000110, (outs), (ins MEMri:$addr, IntRegs:$src), "sth $src, [$addr]", - [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>; + [(truncstorei16 i32:$src, ADDRri:$addr)]>; def STrr : F3_1<3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), "st $src, [$addr]", - [(store IntRegs:$src, ADDRrr:$addr)]>; + [(store i32:$src, ADDRrr:$addr)]>; def STri : F3_2<3, 0b000100, (outs), (ins MEMri:$addr, IntRegs:$src), "st $src, [$addr]", - [(store IntRegs:$src, ADDRri:$addr)]>; + [(store i32:$src, ADDRri:$addr)]>; // Section B.5 - Store Floating-point Instructions, p. 97 def STFrr : F3_1<3, 0b100100, (outs), (ins MEMrr:$addr, FPRegs:$src), "st $src, [$addr]", - [(store FPRegs:$src, ADDRrr:$addr)]>; + [(store f32:$src, ADDRrr:$addr)]>; def STFri : F3_2<3, 0b100100, (outs), (ins MEMri:$addr, FPRegs:$src), "st $src, [$addr]", - [(store FPRegs:$src, ADDRri:$addr)]>; + [(store f32:$src, ADDRri:$addr)]>; def STDFrr : F3_1<3, 0b100111, (outs), (ins MEMrr:$addr, DFPRegs:$src), "std $src, [$addr]", - [(store DFPRegs:$src, ADDRrr:$addr)]>; + [(store f64:$src, ADDRrr:$addr)]>; def STDFri : F3_2<3, 0b100111, (outs), (ins MEMri:$addr, DFPRegs:$src), "std $src, [$addr]", - [(store DFPRegs:$src, ADDRri:$addr)]>; + [(store f64:$src, ADDRri:$addr)]>; // Section B.9 - SETHI Instruction, p. 104 def SETHIi: F2_1<0b100, (outs IntRegs:$dst), (ins i32imm:$src), "sethi $src, $dst", - [(set IntRegs:$dst, SETHIimm:$src)]>; + [(set i32:$dst, SETHIimm:$src)]>; // Section B.10 - NOP Instruction, p. 105 // (It's a special case of SETHI) @@ -426,7 +420,7 @@ defm AND : F3_12<"and", 0b000001, and>; def ANDNrr : F3_1<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "andn $b, $c, $dst", - [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>; + [(set i32:$dst, (and i32:$b, (not i32:$c)))]>; def ANDNri : F3_2<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "andn $b, $c, $dst", []>; @@ -436,7 +430,7 @@ defm OR : F3_12<"or", 0b000010, or>; def ORNrr : F3_1<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "orn $b, $c, $dst", - [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>; + [(set i32:$dst, (or i32:$b, (not i32:$c)))]>; def ORNri : F3_2<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "orn $b, $c, $dst", []>; @@ -445,7 +439,7 @@ defm XOR : F3_12<"xor", 0b000011, xor>; def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "xnor $b, $c, $dst", - [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>; + [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>; def XNORri : F3_2<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "xnor $b, $c, $dst", []>; @@ -462,7 +456,7 @@ defm ADD : F3_12<"add", 0b000000, add>; def LEA_ADDri : F3_2<2, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "add ${addr:arith}, $dst", - [(set IntRegs:$dst, ADDRri:$addr)]>; + [(set i32:$dst, ADDRri:$addr)]>; let Defs = [ICC] in defm ADDCC : F3_12<"addcc", 0b010000, addc>; @@ -603,11 +597,11 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010, def FSTOD : F3_3<2, 0b110100, 0b011001001, (outs DFPRegs:$dst), (ins FPRegs:$src), "fstod $src, $dst", - [(set DFPRegs:$dst, (fextend FPRegs:$src))]>; + [(set f64:$dst, (fextend f32:$src))]>; def FDTOS : F3_3<2, 0b110100, 0b011000110, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtos $src, $dst", - [(set FPRegs:$dst, (fround DFPRegs:$src))]>; + [(set f32:$dst, (fround f64:$src))]>; // Floating-point Move Instructions, p. 144 def FMOVS : F3_3<2, 0b110100, 0b000000001, @@ -616,22 +610,22 @@ def FMOVS : F3_3<2, 0b110100, 0b000000001, def FNEGS : F3_3<2, 0b110100, 0b000000101, (outs FPRegs:$dst), (ins FPRegs:$src), "fnegs $src, $dst", - [(set FPRegs:$dst, (fneg FPRegs:$src))]>; + [(set f32:$dst, (fneg f32:$src))]>; def FABSS : F3_3<2, 0b110100, 0b000001001, (outs FPRegs:$dst), (ins FPRegs:$src), "fabss $src, $dst", - [(set FPRegs:$dst, (fabs FPRegs:$src))]>; + [(set f32:$dst, (fabs f32:$src))]>; // Floating-point Square Root Instructions, p.145 def FSQRTS : F3_3<2, 0b110100, 0b000101001, (outs FPRegs:$dst), (ins FPRegs:$src), "fsqrts $src, $dst", - [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>; + [(set f32:$dst, (fsqrt f32:$src))]>; def FSQRTD : F3_3<2, 0b110100, 0b000101010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fsqrtd $src, $dst", - [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>; + [(set f64:$dst, (fsqrt f64:$src))]>; @@ -639,42 +633,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010, def FADDS : F3_3<2, 0b110100, 0b001000001, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fadds $src1, $src2, $dst", - [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>; def FADDD : F3_3<2, 0b110100, 0b001000010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "faddd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>; def FSUBS : F3_3<2, 0b110100, 0b001000101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsubs $src1, $src2, $dst", - [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>; def FSUBD : F3_3<2, 0b110100, 0b001000110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fsubd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>; // Floating-point Multiply and Divide Instructions, p. 147 def FMULS : F3_3<2, 0b110100, 0b001001001, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fmuls $src1, $src2, $dst", - [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>; def FMULD : F3_3<2, 0b110100, 0b001001010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fmuld $src1, $src2, $dst", - [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>; def FSMULD : F3_3<2, 0b110100, 0b001101001, (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsmuld $src1, $src2, $dst", - [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1), - (fextend FPRegs:$src2)))]>; + [(set f64:$dst, (fmul (fextend f32:$src1), + (fextend f32:$src2)))]>; def FDIVS : F3_3<2, 0b110100, 0b001001101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fdivs $src1, $src2, $dst", - [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>; def FDIVD : F3_3<2, 0b110100, 0b001001110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fdivd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>; // Floating-point Compare Instructions, p. 148 // Note: the 2nd template arg is different for these guys. @@ -685,11 +679,11 @@ let Defs = [FCC] in { def FCMPS : F3_3<2, 0b110101, 0b001010001, (outs), (ins FPRegs:$src1, FPRegs:$src2), "fcmps $src1, $src2\n\tnop", - [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; + [(SPcmpfcc f32:$src1, f32:$src2)]>; def FCMPD : F3_3<2, 0b110101, 0b001010010, (outs), (ins DFPRegs:$src1, DFPRegs:$src2), "fcmpd $src1, $src2\n\tnop", - [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; + [(SPcmpfcc f64:$src1, f64:$src2)]>; } //===----------------------------------------------------------------------===// @@ -704,52 +698,45 @@ let Predicates = [HasV9], Constraints = "$T = $dst" in { def MOVICCrr : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>; def MOVICCri : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>; } let Uses = [FCC] in { def MOVFCCrr : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>; def MOVFCCri : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>; } let Uses = [ICC] in { def FMOVS_ICC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), "fmovs$cc %icc, $F, $dst", - [(set FPRegs:$dst, - (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + [(set f32:$dst, + (SPselecticc f32:$F, f32:$T, imm:$cc))]>; def FMOVD_ICC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), "fmovd$cc %icc, $F, $dst", - [(set DFPRegs:$dst, - (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>; } let Uses = [FCC] in { def FMOVS_FCC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), "fmovs$cc %fcc0, $F, $dst", - [(set FPRegs:$dst, - (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>; def FMOVD_FCC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), "fmovd$cc %fcc0, $F, $dst", - [(set DFPRegs:$dst, - (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>; } } @@ -762,11 +749,11 @@ let Predicates = [HasV9] in { def FNEGD : F3_3<2, 0b110100, 0b000000110, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fnegd $src, $dst", - [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; + [(set f64:$dst, (fneg f64:$src))]>; def FABSD : F3_3<2, 0b110100, 0b000001010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fabsd $src, $dst", - [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; + [(set f64:$dst, (fabs f64:$src))]>; } // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear @@ -774,8 +761,8 @@ let Predicates = [HasV9] in { def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$dst), (ins IntRegs:$src), "popc $src, $dst", []>, Requires<[HasV9]>; -def : Pat<(ctpop IntRegs:$src), - (POPCrr (SLLri IntRegs:$src, 0))>; +def : Pat<(ctpop i32:$src), + (POPCrr (SLLri $src, 0))>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -789,10 +776,10 @@ def : Pat<(i32 imm:$val), (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>; // subc -def : Pat<(subc IntRegs:$b, IntRegs:$c), - (SUBCCrr IntRegs:$b, IntRegs:$c)>; -def : Pat<(subc IntRegs:$b, simm13:$val), - (SUBCCri IntRegs:$b, imm:$val)>; +def : Pat<(subc i32:$b, i32:$c), + (SUBCCrr $b, $c)>; +def : Pat<(subc i32:$b, simm13:$val), + (SUBCCri $b, imm:$val)>; // Global addresses, constant pool entries def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; @@ -801,10 +788,10 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>; // Add reg, lo. This is used when taking the addr of a global/constpool entry. -def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)), - (ADDri IntRegs:$r, tglobaladdr:$in)>; -def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)), - (ADDri IntRegs:$r, tconstpool:$in)>; +def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)), + (ADDri $r, tglobaladdr:$in)>; +def : Pat<(add i32:$r, (SPlo tconstpool:$in)), + (ADDri $r, tconstpool:$in)>; // Calls: def : Pat<(call tglobaladdr:$dst), diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 357879bf6c..b53a1ed095 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index c6cafe59eb..ee88ce77c0 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -610,6 +610,9 @@ struct StringComparator { // Provided for compatibility with MSVC's debug mode. bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } + bool operator()(const char *LHS, const char *RHS) const { + return std::strcmp(LHS, RHS) < 0; + } }; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 7d8b49cdf2..e7282519d5 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() { delete AsmInfo; } +/// \brief Reset the target options based on the function's attributes. +void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { + const Function *F = MF->getFunction(); + TargetOptions &TO = MF->getTarget().Options; + +#define RESET_OPTION(X, Y) \ + do { \ + if (F->hasFnAttribute(Y)) \ + TO.X = \ + (F->getAttributes(). \ + getAttribute(AttributeSet::FunctionIndex, \ + Y).getValueAsString() == "true"); \ + } while (0) + + RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim"); + RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf"); + RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad"); + RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); + RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); + RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); + RESET_OPTION(UseSoftFloat, "use-soft-float"); + RESET_OPTION(DisableTailCalls, "disable-tail-calls"); +} + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. Reloc::Model TargetMachine::getRelocationModel() const { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ee5c2b2bfd..75d26f55c3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -58,10 +58,15 @@ private: X86Operand *ParseIntelOperand(); X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); - X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); - X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); + X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp, + SMLoc StartLoc); + X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp, + unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, + SMLoc SizeDirLoc, unsigned Size); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, SmallString<64> &Err); @@ -170,30 +175,33 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc OffsetOfLoc; bool AddressOf; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + unsigned Size; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNo; - } Reg; - - struct { - const MCExpr *Val; - bool NeedAsmRewrite; - } Imm; - - struct { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - bool NeedSizeDir; - } Mem; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -231,11 +239,6 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } - bool needAsmRewrite() const { - assert(Kind == Immediate && "Invalid access!"); - return Imm.NeedAsmRewrite; - } - const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -332,11 +335,6 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } - unsigned getMemSize() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Size; - } - bool isOffsetOf() const { return OffsetOfLoc.getPointer(); } @@ -345,11 +343,6 @@ struct X86Operand : public MCParsedAsmOperand { return AddressOf; } - bool needSizeDirective() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.NeedSizeDir; - } - bool isMem() const { return Kind == Memory; } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); @@ -485,17 +478,15 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, - bool NeedRewrite = true){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; - Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -503,7 +494,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -512,7 +502,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -527,7 +517,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -711,8 +700,8 @@ class IntelBracExprStateMachine { bool isPlus; public: - IntelBracExprStateMachine(MCAsmParser &parser) : - State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0), + IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) : + State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp), TmpReg(0), TmpInteger(0), isPlus(true) {} unsigned getBaseReg() { return BaseReg; } @@ -890,7 +879,47 @@ public: } }; -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, +X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, + SMLoc End, SMLoc SizeDirLoc, + unsigned Size) { + bool NeedSizeDir = false; + bool IsVarDecl = false; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); + if (!Size) { + Size = tType * 8; // Size is in terms of bits in this context. + NeedSizeDir = Size > 0; + } + } + + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + + if (NeedSizeDir) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, + /*Len*/0, Size)); + + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size); +} + +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, + uint64_t ImmDisp, unsigned Size) { const AsmToken &Tok = Parser.getTok(); SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); @@ -902,7 +931,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned TmpReg = 0; - // Try to handle '[' 'symbol' ']' + // Try to handle '[' 'Symbol' ']' if (getLexer().is(AsmToken::Identifier)) { if (ParseRegister(TmpReg, Start, End)) { const MCExpr *Disp; @@ -911,16 +940,27 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); + + // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'. + if (ImmDisp) + return ErrorOperand(Start, "Unsupported immediate displacement!"); + // Adjust the EndLoc due to the ']'. End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + + // We want the size directive before the '['. + SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); + return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); } } - // Parse [ BaseReg + Scale*IndexReg + Disp ]. + // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an + // immediate displacement before the bracketed expression. bool Done = false; - IntelBracExprStateMachine SM(Parser); + IntelBracExprStateMachine SM(Parser, ImmDisp); // If we parsed a register, then the end loc has already been set and // the identifier has already been lexed. We also need to update the @@ -1007,7 +1047,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } /// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { +X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, + uint64_t ImmDisp, + SMLoc Start) { const AsmToken &Tok = Parser.getTok(); SMLoc End; @@ -1019,8 +1061,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { Parser.Lex(); } + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + if (getLexer().is(AsmToken::Integer)) { + const AsmToken &IntTok = Parser.getTok(); + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, + IntTok.getLoc())); + uint64_t ImmDisp = IntTok.getIntVal(); + Parser.Lex(); // Eat the integer. + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(Start, "Expected '[' token!"); + return ParseIntelBracExpression(SegReg, ImmDisp, Size); + } + if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, Size); + return ParseIntelBracExpression(SegReg, ImmDisp, Size); if (!ParseRegister(SegReg, Start, End)) { // Handel SegReg : [ ... ] @@ -1029,47 +1084,16 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { Parser.Lex(); // Eat : if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, Size); + return ParseIntelBracExpression(SegReg, ImmDisp, Size); } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getParser().parseExpression(Disp, End)) return 0; - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (isParsingInlineAsm()) { - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); - if (!Size) - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; - } - } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else { - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to - // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size, NeedSizeDir); - } + return CreateMemForInlineAsm(Disp, Start, End, Start, Size); } /// Parse the '.' operator. @@ -1197,7 +1221,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); - return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); + return X86Operand::CreateImm(Imm, Start, End); } X86Operand *X86AsmParser::ParseIntelOperand() { @@ -1220,8 +1244,24 @@ X86Operand *X86AsmParser::ParseIntelOperand() { if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; + bool isInteger = getLexer().is(AsmToken::Integer); if (!getParser().parseExpression(Val, End)) { - return X86Operand::CreateImm(Val, Start, End); + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); + // Immediate. + if (getLexer().isNot(AsmToken::LBrac)) + return X86Operand::CreateImm(Val, Start, End); + + // Only positive immediates are valid. + if (!isInteger) { + Error(Parser.getTok().getLoc(), "expected a positive immediate " + "displacement before bracketed expr."); + return 0; + } + + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue()) + return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start); } } @@ -1234,11 +1274,11 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return X86Operand::CreateReg(RegNo, Start, End); getParser().Lex(); // Eat the colon. - return ParseIntelMemOperand(RegNo, Start); + return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start); } // Memory operand. - return ParseIntelMemOperand(0, Start); + return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start); } X86Operand *X86AsmParser::ParseATTOperand() { @@ -1262,7 +1302,6 @@ X86Operand *X86AsmParser::ParseATTOperand() { if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); - getParser().Lex(); // Eat the colon. return ParseMemOperand(RegNo, Start); } @@ -1734,242 +1773,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { - switch (Inst.getOpcode()) { - default: return false; - case X86::AND16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, + bool isCmp) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + if (!isCmp) + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; +} - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::AX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); + case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); + case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); + case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); + case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); + case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); + case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); + case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); + case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); + case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); + case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); + case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); + case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); + case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); + case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); + case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); + case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); + case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); + case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); + case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); + case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); + case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); + case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); + case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); } } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 85d8a991dd..e40edba6d6 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -61,7 +61,7 @@ static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint8_t opcode) { const struct ContextDecision* decision = 0; - + switch (type) { case ONEBYTE: decision = &ONEBYTE_SYM; @@ -102,7 +102,7 @@ static InstrUID decode(OpcodeType type, uint8_t opcode, uint8_t modRM) { const struct ModRMDecision* dec = 0; - + switch (type) { case ONEBYTE: dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; @@ -123,7 +123,7 @@ static InstrUID decode(OpcodeType type, dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; } - + switch (dec->modrm_type) { default: debug("Corrupt table! Unknown modrm_type"); @@ -171,10 +171,10 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { */ static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); - + if (!ret) ++(insn->readerCursor); - + return ret; } @@ -238,19 +238,19 @@ CONSUME_FUNC(consumeUInt64, uint64_t) */ static void dbgprintf(struct InternalInstruction* insn, const char* format, - ...) { + ...) { char buffer[256]; va_list ap; - + if (!insn->dlog) return; - + va_start(ap, format); (void)vsnprintf(buffer, sizeof(buffer), format, ap); va_end(ap); - + insn->dlog(insn->dlogArg, buffer); - + return; } @@ -305,27 +305,40 @@ static int readPrefixes(struct InternalInstruction* insn) { BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; uint8_t byte = 0; - + BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; - + dbgprintf(insn, "readPrefixes()"); - + while (isPrefix) { prefixLocation = insn->readerCursor; - + if (consumeByte(insn, &byte)) return -1; /* - * If the first byte is a LOCK prefix break and let it be disassembled - * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. - * FIXME there is currently no way to get the disassembler to print the - * lock prefix if it is not the first byte. + * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then + * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) - break; - + if (insn->readerCursor - 1 == insn->startLocation + && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { + uint8_t nextByte; + if (byte == 0xf0) + break; + if (lookAtByte(insn, &nextByte)) + return -1; + if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { + if (consumeByte(insn, &nextByte)) + return -1; + if (lookAtByte(insn, &nextByte)) + return -1; + unconsumeByte(insn); + } + if (nextByte != 0x0f && nextByte != 0x90) + break; + } + switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ @@ -387,21 +400,21 @@ static int readPrefixes(struct InternalInstruction* insn) { isPrefix = FALSE; break; } - + if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } - + insn->vexSize = 0; - + if (byte == 0xc4) { uint8_t byte1; - + if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } - + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; @@ -410,67 +423,67 @@ static int readPrefixes(struct InternalInstruction* insn) { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } - + if (insn->vexSize == 3) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ - + if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 + insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) | (rFromVEX2of3(insn->vexPrefix[1]) << 2) | (xFromVEX2of3(insn->vexPrefix[1]) << 1) | (bFromVEX2of3(insn->vexPrefix[1]) << 0); } - + switch (ppFromVEX3of3(insn->vexPrefix[2])) { default: break; case VEX_PREFIX_66: - hasOpSize = TRUE; + hasOpSize = TRUE; break; } - + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); } } else if (byte == 0xc5) { uint8_t byte1; - + if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } - + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { unconsumeByte(insn); } - + if (insn->vexSize == 2) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); - + if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 + insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); } - + switch (ppFromVEX2of2(insn->vexPrefix[1])) { default: break; case VEX_PREFIX_66: - hasOpSize = TRUE; + hasOpSize = TRUE; break; } - + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); } } @@ -478,17 +491,17 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->mode == MODE_64BIT) { if ((byte & 0xf0) == 0x40) { uint8_t opcodeByte; - + if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { dbgprintf(insn, "Redundant REX prefix"); return -1; } - + insn->rexPrefix = byte; insn->necessaryPrefixLocation = insn->readerCursor - 2; - + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -526,7 +539,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->immediateSize = (hasOpSize ? 2 : 4); } } - + return 0; } @@ -537,22 +550,22 @@ static int readPrefixes(struct InternalInstruction* insn) { * @param insn - The instruction whose opcode is to be read. * @return - 0 if the opcode could be read successfully; nonzero otherwise. */ -static int readOpcode(struct InternalInstruction* insn) { +static int readOpcode(struct InternalInstruction* insn) { /* Determine the length of the primary opcode */ - + uint8_t current; - + dbgprintf(insn, "readOpcode()"); - + insn->opcodeType = ONEBYTE; - + if (insn->vexSize == 3) { switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); - return -1; + return -1; case 0: break; case VEX_LOB_0F: @@ -564,7 +577,7 @@ static int readOpcode(struct InternalInstruction* insn) { insn->threeByteEscape = 0x38; insn->opcodeType = THREEBYTE_38; return consumeByte(insn, &insn->opcode); - case VEX_LOB_0F3A: + case VEX_LOB_0F3A: insn->twoByteEscape = 0x0f; insn->threeByteEscape = 0x3a; insn->opcodeType = THREEBYTE_3A; @@ -577,68 +590,68 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = TWOBYTE; return consumeByte(insn, &insn->opcode); } - + if (consumeByte(insn, ¤t)) return -1; - + if (current == 0x0f) { dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); - + insn->twoByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + if (current == 0x38) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_38; } else if (current == 0x3a) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_3A; } else if (current == 0xa6) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_A6; } else if (current == 0xa7) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_A7; } else { dbgprintf(insn, "Didn't find a three-byte escape prefix"); - + insn->opcodeType = TWOBYTE; } } - + /* * At this point we have consumed the full opcode. * Anything we consume from here on must be unconsumed. */ - + insn->opcode = current; - + return 0; } @@ -660,19 +673,19 @@ static int getIDWithAttrMask(uint16_t* instructionID, struct InternalInstruction* insn, uint8_t attrMask) { BOOL hasModRMExtension; - + uint8_t instructionClass; instructionClass = contextForAttrs(attrMask); - + hasModRMExtension = modRMRequired(insn->opcodeType, instructionClass, insn->opcode); - + if (hasModRMExtension) { if (readModRM(insn)) return -1; - + *instructionID = decode(insn->opcodeType, instructionClass, insn->opcode, @@ -683,7 +696,7 @@ static int getIDWithAttrMask(uint16_t* instructionID, insn->opcode, 0); } - + return 0; } @@ -696,7 +709,7 @@ static int getIDWithAttrMask(uint16_t* instructionID, */ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { off_t i; - + for (i = 0;; i++) { if (orig[i] == '\0' && equiv[i] == '\0') return TRUE; @@ -715,8 +728,8 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { } /* - * getID - Determines the ID of an instruction, consuming the ModR/M byte as - * appropriate for extended and escape opcodes. Determines the attributes and + * getID - Determines the ID of an instruction, consuming the ModR/M byte as + * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. * * @param insn - The instruction whose ID is to be determined. @@ -726,21 +739,21 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { static int getID(struct InternalInstruction* insn, const void *miiArg) { uint8_t attrMask; uint16_t instructionID; - + dbgprintf(insn, "getID()"); - + attrMask = ATTR_NONE; if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; - + if (insn->vexSize) { attrMask |= ATTR_VEX; if (insn->vexSize == 3) { switch (ppFromVEX3of3(insn->vexPrefix[2])) { case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; + attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; @@ -749,14 +762,14 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { attrMask |= ATTR_XD; break; } - + if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } else if (insn->vexSize == 2) { switch (ppFromVEX2of2(insn->vexPrefix[1])) { case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; + attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; @@ -765,7 +778,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { attrMask |= ATTR_XD; break; } - + if (lFromVEX2of2(insn->vexPrefix[1])) attrMask |= ATTR_VEXL; } @@ -836,26 +849,26 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { * conservative, but in the specific case where OpSize is present but not * in the right place we check if there's a 16-bit operation. */ - + const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; const char *specName, *specWithOpSizeName; - + spec = specifierForUID(instructionID); - + if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { - /* + /* * ModRM required with OpSize but not present; give up and return version * without OpSize set */ - + insn->instructionID = instructionID; insn->spec = spec; return 0; } - + specName = x86DisassemblerGetInstrName(instructionID, miiArg); specWithOpSizeName = x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); @@ -882,10 +895,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { const struct InstructionSpecifier *specWithNewOpcode; spec = specifierForUID(instructionID); - + /* Borrow opcode from one of the other XCHGar opcodes */ insn->opcode = 0x91; - + if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) { @@ -906,10 +919,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { return 0; } - + insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); - + return 0; } @@ -924,14 +937,14 @@ static int readSIB(struct InternalInstruction* insn) { SIBIndex sibIndexBase = 0; SIBBase sibBaseBase = 0; uint8_t index, base; - + dbgprintf(insn, "readSIB()"); - + if (insn->consumedSIB) return 0; - + insn->consumedSIB = TRUE; - + switch (insn->addressSize) { case 2: dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); @@ -949,9 +962,9 @@ static int readSIB(struct InternalInstruction* insn) { if (consumeByte(insn, &insn->sib)) return -1; - + index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); - + switch (index) { case 0x4: insn->sibIndex = SIB_INDEX_NONE; @@ -963,7 +976,7 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibIndex = SIB_INDEX_NONE; break; } - + switch (scaleFromSIB(insn->sib)) { case 0: insn->sibScale = 1; @@ -978,9 +991,9 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibScale = 8; break; } - + base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); - + switch (base) { case 0x5: switch (modFromModRM(insn->modRM)) { @@ -990,12 +1003,12 @@ static int readSIB(struct InternalInstruction* insn) { break; case 0x1: insn->eaDisplacement = EA_DISP_8; - insn->sibBase = (insn->addressSize == 4 ? + insn->sibBase = (insn->addressSize == 4 ? SIB_BASE_EBP : SIB_BASE_RBP); break; case 0x2: insn->eaDisplacement = EA_DISP_32; - insn->sibBase = (insn->addressSize == 4 ? + insn->sibBase = (insn->addressSize == 4 ? SIB_BASE_EBP : SIB_BASE_RBP); break; case 0x3: @@ -1007,7 +1020,7 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibBase = (SIBBase)(sibBaseBase + base); break; } - + return 0; } @@ -1015,22 +1028,22 @@ static int readSIB(struct InternalInstruction* insn) { * readDisplacement - Consumes the displacement of an instruction. * * @param insn - The instruction whose displacement is to be read. - * @return - 0 if the displacement byte was successfully read; nonzero + * @return - 0 if the displacement byte was successfully read; nonzero * otherwise. */ -static int readDisplacement(struct InternalInstruction* insn) { +static int readDisplacement(struct InternalInstruction* insn) { int8_t d8; int16_t d16; int32_t d32; - + dbgprintf(insn, "readDisplacement()"); - + if (insn->consumedDisplacement) return 0; - + insn->consumedDisplacement = TRUE; insn->displacementOffset = insn->readerCursor - insn->startLocation; - + switch (insn->eaDisplacement) { case EA_DISP_NONE: insn->consumedDisplacement = FALSE; @@ -1051,7 +1064,7 @@ static int readDisplacement(struct InternalInstruction* insn) { insn->displacement = d32; break; } - + insn->consumedDisplacement = TRUE; return 0; } @@ -1063,22 +1076,22 @@ static int readDisplacement(struct InternalInstruction* insn) { * @param insn - The instruction whose addressing information is to be read. * @return - 0 if the information was successfully read; nonzero otherwise. */ -static int readModRM(struct InternalInstruction* insn) { +static int readModRM(struct InternalInstruction* insn) { uint8_t mod, rm, reg; - + dbgprintf(insn, "readModRM()"); - + if (insn->consumedModRM) return 0; - + if (consumeByte(insn, &insn->modRM)) return -1; insn->consumedModRM = TRUE; - + mod = modFromModRM(insn->modRM); rm = rmFromModRM(insn->modRM); reg = regFromModRM(insn->modRM); - + /* * This goes by insn->registerSize to pick the correct register, which messes * up if we're using (say) XMM or 8-bit register operands. That gets fixed in @@ -1098,16 +1111,16 @@ static int readModRM(struct InternalInstruction* insn) { insn->eaRegBase = EA_REG_RAX; break; } - + reg |= rFromREX(insn->rexPrefix) << 3; rm |= bFromREX(insn->rexPrefix) << 3; - + insn->reg = (Reg)(insn->regBase + reg); - + switch (insn->addressSize) { case 2: insn->eaBaseBase = EA_BASE_BX_SI; - + switch (mod) { case 0x0: if (rm == 0x6) { @@ -1142,14 +1155,14 @@ static int readModRM(struct InternalInstruction* insn) { case 4: case 8: insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); - + switch (mod) { case 0x0: insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ switch (rm) { case 0x4: case 0xc: /* in case REXW.b is set */ - insn->eaBase = (insn->addressSize == 4 ? + insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); readSIB(insn); if (readDisplacement(insn)) @@ -1191,7 +1204,7 @@ static int readModRM(struct InternalInstruction* insn) { } break; } /* switch (insn->addressSize) */ - + return 0; } @@ -1274,12 +1287,12 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) * @return - 0 if fixup was successful; -1 if the register returned was * invalid for its class. */ -static int fixupReg(struct InternalInstruction *insn, +static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op) { uint8_t valid; - + dbgprintf(insn, "fixupReg()"); - + switch ((OperandEncoding)op->encoding) { default: debug("Expected a REG or R/M encoding in fixupReg"); @@ -1311,12 +1324,12 @@ static int fixupReg(struct InternalInstruction *insn, } break; } - + return 0; } /* - * readOpcodeModifier - Reads an operand from the opcode field of an + * readOpcodeModifier - Reads an operand from the opcode field of an * instruction. Handles AddRegFrm instructions. * * @param insn - The instruction whose opcode field is to be read. @@ -1326,12 +1339,12 @@ static int fixupReg(struct InternalInstruction *insn, */ static int readOpcodeModifier(struct InternalInstruction* insn) { dbgprintf(insn, "readOpcodeModifier()"); - + if (insn->consumedOpcodeModifier) return 0; - + insn->consumedOpcodeModifier = TRUE; - + switch (insn->spec->modifierType) { default: debug("Unknown modifier type."); @@ -1345,11 +1358,11 @@ static int readOpcodeModifier(struct InternalInstruction* insn) { case MODIFIER_MODRM: insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; return 0; - } + } } /* - * readOpcodeRegister - Reads an operand from the opcode field of an + * readOpcodeRegister - Reads an operand from the opcode field of an * instruction and interprets it appropriately given the operand width. * Handles AddRegFrm instructions. * @@ -1364,39 +1377,39 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { if (readOpcodeModifier(insn)) return -1; - + if (size == 0) size = insn->registerSize; - + switch (size) { case 1: - insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) + insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); - if (insn->rexPrefix && + if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && insn->opcodeRegister < MODRM_REG_AL + 0x8) { insn->opcodeRegister = (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); } - + break; case 2: insn->opcodeRegister = (Reg)(MODRM_REG_AX - + ((bFromREX(insn->rexPrefix) << 3) + + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); break; case 4: insn->opcodeRegister = (Reg)(MODRM_REG_EAX - + ((bFromREX(insn->rexPrefix) << 3) + + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); break; case 8: - insn->opcodeRegister = (Reg)(MODRM_REG_RAX - + ((bFromREX(insn->rexPrefix) << 3) + insn->opcodeRegister = (Reg)(MODRM_REG_RAX + + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); break; } - + return 0; } @@ -1414,20 +1427,20 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { uint16_t imm16; uint32_t imm32; uint64_t imm64; - + dbgprintf(insn, "readImmediate()"); - + if (insn->numImmediatesConsumed == 2) { debug("Already consumed two immediates"); return -1; } - + if (size == 0) size = insn->immediateSize; else insn->immediateSize = size; insn->immediateOffset = insn->readerCursor - insn->startLocation; - + switch (size) { case 1: if (consumeByte(insn, &imm8)) @@ -1450,9 +1463,9 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { insn->immediates[insn->numImmediatesConsumed] = imm64; break; } - + insn->numImmediatesConsumed++; - + return 0; } @@ -1465,7 +1478,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { */ static int readVVVV(struct InternalInstruction* insn) { dbgprintf(insn, "readVVVV()"); - + if (insn->vexSize == 3) insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); else if (insn->vexSize == 2) @@ -1490,14 +1503,14 @@ static int readOperands(struct InternalInstruction* insn) { int index; int hasVVVV, needVVVV; int sawRegImm = 0; - + dbgprintf(insn, "readOperands()"); /* If non-zero vvvv specified, need to make sure one of the operands uses it. */ hasVVVV = !readVVVV(insn); needVVVV = hasVVVV && (insn->vvvv != 0); - + for (index = 0; index < X86_MAX_OPERANDS; ++index) { switch (x86OperandSets[insn->spec->operands][index].encoding) { case ENCODING_NONE: @@ -1599,7 +1612,7 @@ static int readOperands(struct InternalInstruction* insn) { /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ if (needVVVV) return -1; - + return 0; } @@ -1607,7 +1620,7 @@ static int readOperands(struct InternalInstruction* insn) { * decodeInstruction - Reads and interprets a full instruction provided by the * user. * - * @param insn - A pointer to the instruction to be populated. Must be + * @param insn - A pointer to the instruction to be populated. Must be * pre-allocated. * @param reader - The function to be used to read the instruction's bytes. * @param readerArg - A generic argument to be passed to the reader to store @@ -1632,7 +1645,7 @@ int decodeInstruction(struct InternalInstruction* insn, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); - + insn->reader = reader; insn->readerArg = readerArg; insn->dlog = logger; @@ -1641,7 +1654,7 @@ int decodeInstruction(struct InternalInstruction* insn, insn->readerCursor = startLoc; insn->mode = mode; insn->numImmediatesConsumed = 0; - + if (readPrefixes(insn) || readOpcode(insn) || getID(insn, miiArg) || @@ -1650,14 +1663,14 @@ int decodeInstruction(struct InternalInstruction* insn, return -1; insn->operands = &x86OperandSets[insn->spec->operands][0]; - + insn->length = insn->readerCursor - insn->startLocation; - + dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", startLoc, insn->readerCursor, insn->length); - + if (insn->length > 15) dbgprintf(insn, "Instruction exceeds 15-byte limit"); - + return 0; } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 9e68388cf2..3669560070 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -276,9 +276,9 @@ namespace X86II { MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46, - MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50, - MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54, - MRM_DE = 55, MRM_DF = 56, + MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50, + MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54, + MRM_DD = 55, MRM_DE = 56, MRM_DF = 57, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -574,16 +574,13 @@ namespace X86II { ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV). return FirstMemOp; } - case X86II::MRM_C1: case X86II::MRM_C2: - case X86II::MRM_C3: case X86II::MRM_C4: - case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_E8: case X86II::MRM_F0: - case X86II::MRM_F8: case X86II::MRM_F9: - case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: - case X86II::MRM_D8: case X86II::MRM_D9: - case X86II::MRM_DA: case X86II::MRM_DB: - case X86II::MRM_DC: case X86II::MRM_DD: + case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: + case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: + case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1: + case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: + case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA: + case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: return -1; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204ae75..776cee1e35 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: @@ -1117,16 +1136,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, TSFlags, CurByte, OS, Fixups); CurOp += X86::AddrNumOperands; break; - case X86II::MRM_C1: case X86II::MRM_C2: - case X86II::MRM_C3: case X86II::MRM_C4: - case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: - case X86II::MRM_D8: case X86II::MRM_D9: - case X86II::MRM_DA: case X86II::MRM_DB: - case X86II::MRM_DC: case X86II::MRM_DD: - case X86II::MRM_DE: case X86II::MRM_DF: - case X86II::MRM_E8: case X86II::MRM_F0: + case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: + case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4: + case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8: + case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB: + case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: + case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); @@ -1143,6 +1159,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_D1: MRM = 0xD1; break; case X86II::MRM_D4: MRM = 0xD4; break; case X86II::MRM_D5: MRM = 0xD5; break; + case X86II::MRM_D6: MRM = 0xD6; break; case X86II::MRM_D8: MRM = 0xD8; break; case X86II::MRM_D9: MRM = 0xD9; break; case X86II::MRM_DA: MRM = 0xDA; break; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 0216252c19..1dcc344e7f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,8 +120,14 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; +def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true", + "Support HLE">; def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", "Support ADX instructions">; +def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", + "Support PRFCHW instructions">; +def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", + "Support RDSEED instruction">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", @@ -130,6 +136,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", + "CallRegIndirect", "true", + "Call register indirect">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -143,9 +152,6 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", class Proc<string Name, list<SubtargetFeature> Features> : ProcessorModel<Name, GenericModel, Features>; -class AtomProc<string Name, list<SubtargetFeature> Features> - : ProcessorModel<Name, AtomModel, Features>; - def : Proc<"generic", []>; def : Proc<"i386", []>; def : Proc<"i486", []>; @@ -162,46 +168,61 @@ def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, FeatureFastUAMem]>; -def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, FeaturePadShortFunctions]>; +// Intel Core Duo. +def : ProcessorModel<"yonah", SandyBridgeModel, + [FeatureSSE3, FeatureSlowBTMem]>; + +// NetBurst. +def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; +def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; + +// Intel Core 2 Solo/Duo. +def : ProcessorModel<"core2", SandyBridgeModel, + [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; +def : ProcessorModel<"penryn", SandyBridgeModel, + [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; + +// Atom. +def : ProcessorModel<"atom", AtomModel, + [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide, + FeatureCallRegIndirect, + FeaturePadShortFunctions]>; + // "Arrandale" along with corei3 and corei5 -def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES]>; -def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT]>; +def : ProcessorModel<"corei7", SandyBridgeModel, + [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>; + +def : ProcessorModel<"nehalem", SandyBridgeModel, + [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFastUAMem, FeaturePOPCNT]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; +def : ProcessorModel<"westmere", SandyBridgeModel, + [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFastUAMem, FeaturePOPCNT, FeatureAES, + FeaturePCLMUL]>; // Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. -def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; +def : ProcessorModel<"corei7-avx", SandyBridgeModel, + [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; // Ivy Bridge -def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, - FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>; +def : ProcessorModel<"core-avx-i", SandyBridgeModel, + [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, + FeatureF16C, FeatureFSGSBase]>; // Haswell -def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, - FeatureRDRAND, FeatureF16C, FeatureFSGSBase, - FeatureMOVBE, FeatureLZCNT, FeatureBMI, - FeatureBMI2, FeatureFMA, - FeatureRTM]>; +def : ProcessorModel<"core-avx2", HaswellModel, + [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, + FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, + FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, + FeatureHLE]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index ac5daec2b2..6b228b0b03 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -201,7 +201,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP_PIC_BASE: O << "@TLVP" << '-' << *MF->getPICBaseSymbol(); break; - case X86II::MO_SECREL: O << "@SECREL"; break; + case X86II::MO_SECREL: O << "@SECREL32"; break; } } diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ece38aa346..2518e02e2a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b5c3270065..c5da0b9b16 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -816,14 +816,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // The x86-64 ABI for returning structs by value requires that we copy // the sret argument into %rax for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out - // and into %rax. - if (Subtarget->is64Bit() && F.hasStructRetAttr()) { + // and into %rax. We also do the same with %eax for Win32. + if (F.hasStructRetAttr() && + (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); + unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - X86::RAX).addReg(Reg); - RetRegs.push_back(X86::RAX); + RetReg).addReg(Reg); + RetRegs.push_back(RetReg); } // Now emit the RET. @@ -1526,6 +1528,9 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; + if (Subtarget->isTargetWindows()) + return false; + const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 00fbe6924c..6041669f81 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -444,7 +444,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - (N->getOpcode() == X86ISD::CALL || + // Only does this when target favors doesn't favor register indirect + // call. + ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || (N->getOpcode() == X86ISD::TC_RETURN && // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 960870dc60..69341869aa 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -470,7 +470,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); - // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build @@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); - if (Subtarget->hasInt256()) { - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v4i32, Legal); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); - } setOperationAction(ISD::SDIV, MVT::v8i16, Custom); setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } @@ -1118,6 +1111,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); @@ -1186,14 +1180,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); @@ -1210,15 +1196,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + } - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); - } + setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; @@ -1356,7 +1344,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(4); // 2^4 bytes. - BenefitFromCodePlacementOpt = true; // Predictable cmov don't hurt on atom because it's in-order. PredictableSelectIsExpensive = !Subtarget->isAtom(); @@ -1679,10 +1666,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, // The x86-64 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. + // Win32 requires us to put the sret argument to %eax as well. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. - if (Subtarget->is64Bit() && - DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() && + (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); @@ -1690,12 +1678,14 @@ X86TargetLowering::LowerReturn(SDValue Chain, "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX; + unsigned RetValReg + = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ? + X86::RAX : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. - RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64)); + RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy())); } RetOps[0] = Chain; // Update chain. @@ -2049,9 +2039,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // The x86-64 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. + // Win32 requires us to put the sret argument to %eax as well. // Save the argument into a virtual register so that we can access it // from the return points. - if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction()->hasStructRetAttr() && + (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { @@ -7834,7 +7826,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Chain.getValue(1)); } - if (Subtarget->isTargetWindows()) { + if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7854,18 +7846,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or - // %gs:0x58 (64-bit). + // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly + // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) - : DAG.getExternalSymbol("_tls_array", - getPointerTy()), + SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) : + (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) : + DAG.getExternalSymbol("_tls_array", getPointerTy())); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); @@ -10921,16 +10914,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. - // RDRAND intrinsics. + // RDRAND/RDSEED intrinsics. case Intrinsic::x86_rdrand_16: case Intrinsic::x86_rdrand_32: - case Intrinsic::x86_rdrand_64: { + case Intrinsic::x86_rdrand_64: + case Intrinsic::x86_rdseed_16: + case Intrinsic::x86_rdseed_32: + case Intrinsic::x86_rdseed_64: { + unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 || + IntNo == Intrinsic::x86_rdseed_32 || + IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED : + X86ISD::RDRAND; // Emit the node with the right value type. SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other); - SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0)); + SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0)); - // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise - // return the value from Rand, which is always 0, casted to i32. + // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. + // Otherwise return the value from Rand, which is always 0, casted to i32. SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), DAG.getConstant(1, Op->getValueType(1)), DAG.getConstant(X86::COND_B, MVT::i32), @@ -10943,6 +10943,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } + + // XTEST intrinsics. + case Intrinsic::x86_xtest: { + SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); + SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0)); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_NE, MVT::i8), + InTrans); + SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), + Ret, SDValue(InTrans.getNode(), 1)); + } } } @@ -11490,16 +11502,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { - +static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - if (!Subtarget->hasSSE2()) - return SDValue(); - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -11610,6 +11619,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } } + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + unsigned RatioInLog2 = Log2_32_Ceil(Ratio); + uint64_t ShiftAmt = 0; + for (unsigned i = 0; i != Ratio; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); + } + // Check remaining shift amounts. + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Amt.getOperand(i + j)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRAI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + } + } + + return SDValue(); +} + +static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + + if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v4i32 || VT == MVT::v8i16 || + (Subtarget->hasInt256() && + ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v8i32 || VT == MVT::v16i16))) { + SDValue BaseShAmt; + EVT EltVT = VT.getVectorElementType(); + + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned i, j; + for (i = 0; i != NumElts; ++i) { + if (Amt.getOperand(i).getOpcode() == ISD::UNDEF) + continue; + break; + } + for (j = i; j != NumElts; ++j) { + SDValue Arg = Amt.getOperand(j); + if (Arg.getOpcode() == ISD::UNDEF) continue; + if (Arg != Amt.getOperand(i)) + break; + } + if (i != NumElts && j == NumElts) + BaseShAmt = Amt.getOperand(i); + } else { + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) + Amt = Amt.getOperand(0); + if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(Amt)->isSplat()) { + SDValue InVec = Amt.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { + unsigned SplatIdx = + cast<ShuffleVectorSDNode>(Amt)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt, + DAG.getIntPtrConstant(0)); + } + } + + if (BaseShAmt.getNode()) { + if (EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + else if (EltVT.bitsLT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRA: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v4i32: + case MVT::v8i16: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); + } + } + } + } + + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + std::vector<SDValue> Vals(Ratio); + for (unsigned i = 0; i != Ratio; ++i) + Vals[i] = Amt.getOperand(i); + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + for (unsigned j = 0; j != Ratio; ++j) + if (Vals[j] != Amt.getOperand(i + j)) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + SDValue V; + + if (!Subtarget->hasSSE2()) + return SDValue(); + + V = LowerScalarImmediateShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + V = LowerScalarVariableShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + // AVX2 has VPSLLV/VPSRAV/VPSRLV. + if (Subtarget->hasInt256()) { + if (Op.getOpcode() == ISD::SRL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SHL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return Op; + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); @@ -11826,8 +12053,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // fall through case MVT::v4i32: case MVT::v8i16: { - SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, - Op.getOperand(0), ShAmt, DAG); + // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Tmp1 = LowerVectorIntExtend(Op00, DAG); + if (Tmp1.getNode()) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. + Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG); return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } } @@ -12262,7 +12504,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - if (N->getOperand(0).getValueType() != MVT::v2i32 && + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, @@ -12545,6 +12788,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; + case X86ISD::RDSEED: return "X86ISD::RDSEED"; case X86ISD::FMADD: return "X86ISD::FMADD"; case X86ISD::FMSUB: return "X86ISD::FMSUB"; case X86ISD::FNMADD: return "X86ISD::FNMADD"; @@ -12553,6 +12797,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; + case X86ISD::XTEST: return "X86ISD::XTEST"; } } @@ -15584,8 +15829,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // Quit if the constant is neither 0 or 1. return SDValue(); - // Skip 'zext' node. - if (SetCC.getOpcode() == ISD::ZERO_EXTEND) + // Skip 'zext' or 'trunc' node. + if (SetCC.getOpcode() == ISD::ZERO_EXTEND || + SetCC.getOpcode() == ISD::TRUNCATE) SetCC = SetCC.getOperand(0); switch (SetCC.getOpcode()) { @@ -15604,9 +15850,15 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { return SDValue(); // Quit if false value is not a constant. if (!FVal) { - // A special case for rdrand, where 0 is set if false cond is found. SDValue Op = SetCC.getOperand(0); - if (Op.getOpcode() != X86ISD::RDRAND) + // Skip 'zext' or 'trunc' node. + if (Op.getOpcode() == ISD::ZERO_EXTEND || + Op.getOpcode() == ISD::TRUNCATE) + Op = Op.getOperand(0); + // A special case for rdrand/rdseed, where 0 is set if false cond is + // found. + if ((Op.getOpcode() != X86ISD::RDRAND && + Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) return SDValue(); } // Quit if false value is not the constant 0 or 1. @@ -15918,124 +16170,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SHL) { SDValue V = PerformSHLCombine(N, DAG); if (V.getNode()) return V; } - // On X86 with SSE2 support, we can transform this to a vector shift if - // all elements are shifted by the same amount. We can't do this in legalize - // because the a constant vector is typically transformed to a constant pool - // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) - return SDValue(); - - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget->hasInt256() || - (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) - return SDValue(); - - SDValue ShAmtOp = N->getOperand(1); - EVT EltVT = VT.getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt = SDValue(); - if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - // Handle the case where the build_vector is all undef - // FIXME: Should DAG allow this? - if (i == NumElts) - return SDValue(); - - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - if (Arg != BaseShAmt) { - return SDValue(); - } - } - } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - SDValue InVec = ShAmtOp.getOperand(0); - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = InVec.getValueType().getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = InVec.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); - if (C->getZExtValue() == SplatIdx) - BaseShAmt = InVec.getOperand(1); - } - } - if (BaseShAmt.getNode() == 0) { - // Don't create instructions with illegal types after legalize - // types has run. - if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) && - !DCI.isBeforeLegalize()) - return SDValue(); - - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); - } - } else - return SDValue(); - - // The shift amount is an i32. - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); - - // The shift amount is identical so we can do a vector shift. - SDValue ValOp = N->getOperand(0); - switch (N->getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRA: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v4i32: - case MVT::v8i16: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG); - } - } + return SDValue(); } // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) @@ -16346,13 +16486,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - if (Mask.getOpcode() != X86ISD::VSRAI) - return SDValue(); - - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(1); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + unsigned SraAmt = ~0; + if (Mask.getOpcode() == ISD::SRA) { + SDValue Amt = Mask.getOperand(1); + if (isSplatVector(Amt.getNode())) { + SDValue SclrAmt = Amt->getOperand(0); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) + SraAmt = C->getZExtValue(); + } + } else if (Mask.getOpcode() == X86ISD::VSRAI) { + SDValue SraC = Mask.getOperand(1); + SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + } if ((SraAmt + 1) != EltBits) return SDValue(); @@ -16526,11 +16672,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned RegSz = RegVT.getSizeInBits(); + // On Sandybridge unaligned 256bit loads are inefficient. ISD::LoadExtType Ext = Ld->getExtensionType(); unsigned Alignment = Ld->getAlignment(); - bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8; - - // On Sandybridge unaligned 256bit loads are inefficient. + bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8; if (RegVT.is256BitVector() && !Subtarget->hasInt256() && !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { unsigned NumElems = RegVT.getVectorNumElements(); @@ -16550,7 +16695,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), - std::max(Alignment/2U, 1U)); + std::min(16U, Alignment)); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); @@ -16721,13 +16866,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = St->getDebugLoc(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned Alignment = St->getAlignment(); - bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8; // If we are saving a concatenation of two XMM registers, perform two stores. // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. + unsigned Alignment = St->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8; if (VT.is256BitVector() && !Subtarget->hasInt256() && StVT == VT && !IsAligned) { unsigned NumElems = VT.getVectorNumElements(); @@ -16747,7 +16892,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), - std::max(Alignment/2U, 1U)); + std::min(16U, Alignment)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index da1dad0f40..5725f7aea5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -356,10 +356,17 @@ namespace llvm { // RDRAND - Get a random integer and indicate whether it is valid in CF. RDRAND, + // RDSEED - Get a NIST SP800-90B & C compliant random integer and + // indicate whether it is valid in CF. + RDSEED, + // PCMP*STRI PCMPISTRI, PCMPESTRI, + // XTEST - Test if in transactional execution. + XTEST, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index bb362f5c7b..ba1aede3c1 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -84,13 +84,16 @@ defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; -def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; +def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", + [(int_x86_mmx_femms)]>; -def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), - "prefetch\t$addr", []>; +def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), + "prefetch\t$addr", + [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>; -def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), - "prefetchw\t$addr", []>; +def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", + [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB, + Requires<[HasPrefetchW]>; // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d86a4065a7..225e9720da 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// // LEA - Load Effective Address - +let SchedRW = [WriteLEA] in { let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), @@ -36,41 +36,52 @@ let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; - - +} // SchedRW //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // +// SchedModel info for instruction that loads one value and gets the second +// (and possibly third) value from a register. +// This is used for instructions that put the memory operands before other +// uses. +class SchedLoadReg<SchedWrite SW> : Sched<[SW, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + ReadAfterLd, ReadAfterLd]>; + // Extra precision multiplication // AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 - + (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 - + [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", // EAX,EDX = EAX*GR32 + "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], - IIC_MUL32_REG>; + IIC_MUL32_REG>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", // RAX,RDX = RAX*GR64 + "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], - IIC_MUL64>; - + IIC_MUL64>, Sched<[WriteIMul]>; +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", @@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] - + (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] - + [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>; } let neverHasSideEffects = 1 in { +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], - IIC_IMUL8>; // AL,AH = AL*GR8 + IIC_IMUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], - IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 + IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], - IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 + IIC_IMUL32_RR>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], - IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 + IIC_IMUL64_RR>, Sched<[WriteIMul]>; let mayLoad = 1 in { +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; - // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize, + SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>; } } // neverHasSideEffects @@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +let isCommutable = 1, SchedRW = [WriteIMul] in { +// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, TB; -} +} // isCommutable, SchedRW // Register-Memory Signed Integer Multiply +let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (X86smul_flag GR64:$src1, (load addr:$src2)))], IIC_IMUL64_RM>, TB; +} // SchedRW } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { +let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), @@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))], IIC_IMUL64_RRI>; - +} // SchedRW // Memory-Integer Signed Integer Multiply +let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (X86smul_flag (load addr:$src1), i64immSExt8:$src2))], IIC_IMUL64_RMI>; +} // SchedRW } // Defs = [EFLAGS] @@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", [], IIC_DIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", [], IIC_DIV8_MEM>; + "div{b}\t$src", [], IIC_DIV8_MEM>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", [], IIC_DIV16>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", [], IIC_DIV32>; + "div{l}\t$src", [], IIC_DIV32>, + SchedLoadReg<WriteIDivLd>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", [], IIC_DIV64>; + "div{q}\t$src", [], IIC_DIV64>, + SchedLoadReg<WriteIDivLd>; } // Signed division/remainder. +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", [], IIC_IDIV8>; @@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", [], IIC_IDIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", [], IIC_IDIV8>; + "idiv{b}\t$src", [], IIC_IDIV8>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", [], IIC_IDIV32>; + "idiv{l}\t$src", [], IIC_IDIV32>, + SchedLoadReg<WriteIDivLd>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", [], IIC_IDIV64>; + "idiv{q}\t$src", [], IIC_IDIV64>, + SchedLoadReg<WriteIDivLd>; } } // hasSideEffects = 0 @@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), @@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), (implicit EFLAGS)], IIC_UNARY_REG>; -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +// Read-modify-write negate. +let SchedRW = [WriteALULd, WriteRMW] in { def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), @@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // SchedRW } // Defs = [EFLAGS] // Note: NOT does not set EFLAGS! -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; @@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; +} // SchedRW } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", @@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", @@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; } // CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -532,7 +575,7 @@ let CodeSize = 2 in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, + Sched<[WriteALU]>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, operands, []> { + mnemonic, operands, []>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", [], IIC_BIN_NONMEM>; + "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } //===----------------------------------------------------------------------===// @@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], - IIC_BIN_NONMEM>; + IIC_BIN_NONMEM>, Sched<[WriteALU]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1241,12 +1294,12 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - [], IIC_MUL8>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - [], IIC_MUL8>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>; } } @@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in { // ADCX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize; @@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize; @@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { // ADOX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; @@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 8f2d0a1aae..a967a4da5c 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -16,7 +16,7 @@ // SetCC instructions. multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1 in { + isCommutable = 1, SchedRW = [WriteALU] in { def NAME#16rr : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { IIC_CMOV32_RR>, TB; } - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteALULd, ReadAfterLd] in { def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], - IIC_SET_R>, TB; + IIC_SET_R>, TB, Sched<[WriteALU]>; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), [(store (X86setcc OpNode, EFLAGS), addr:$dst)], - IIC_SET_M>, TB; + IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 734e5982b2..d9ff0c63c5 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -149,11 +149,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { //===----------------------------------------------------------------------===// // EH Pseudo Instructions // +let SchedRW = [WriteSystem] in { let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,7 +162,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -186,6 +187,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Requires<[In64BitMode]>; } } +} // SchedRW let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), @@ -220,7 +222,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -229,11 +231,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, + Sched<[WriteZero]>; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -245,7 +248,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -254,10 +257,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + IIC_ALU_NONMEM>, Sched<[WriteALU]>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. @@ -320,6 +323,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), //===----------------------------------------------------------------------===// // String Pseudo Instructions // +let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, @@ -382,6 +386,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in { [(X86rep_stos i64)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -594,12 +599,13 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "or{l}\t{$zero, $dst|$dst, $zero}", - [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; + [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK, + Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", - [(X86MemBarrier)]>; + [(X86MemBarrier)]>, Sched<[WriteLoad]>; // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction @@ -607,7 +613,8 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins), // ImmMod corresponds to the instruction format of the mi and mi8 versions multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, string mnemonic> { -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, @@ -694,7 +701,8 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form, string mnemonic> { -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), !strconcat(mnemonic, "{b}\t$dst"), @@ -728,7 +736,7 @@ let isCodeGenOnly = 1 in { multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag, InstrItinClass itin8, InstrItinClass itin> { -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { let Defs = [AL, EFLAGS], Uses = [AL] in def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), @@ -748,14 +756,15 @@ let isCodeGenOnly = 1 in { } } -let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], + SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem, IIC_CMPX_LOCK_8B>; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - Predicates = [HasCmpxchg16b] in { + Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", X86cas16, i128mem, IIC_CMPX_LOCK_16B>, REX_W; @@ -768,7 +777,8 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin8, InstrItinClass itin> { - let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { + let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), @@ -990,9 +1000,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement @@ -1192,7 +1199,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR +// Try this before the selecting to OR. +let AddedComplexity = 5, SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { @@ -1239,7 +1247,7 @@ def ADD64ri32_DB : I<0, Pseudo, [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt32:$src2))]>; } -} // AddedComplexity +} // AddedComplexity, SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index bfe954114c..0e696513d4 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -20,7 +20,7 @@ // The X86retflag return instructions are variadic because we may add ST0 and // ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, FPForm = SpecialFP in { + hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in { def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; @@ -46,7 +46,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, } // Unconditional branches. -let isBarrier = 1, isBranch = 1, isTerminator = 1 in { +let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>; def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), @@ -58,7 +58,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in { } // Conditional Branches. -let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in { multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [], IIC_Jcc>; @@ -85,7 +85,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; // jcx/jecx/jrcx instructions. -let isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { // These are the 32-bit versions of this instruction for the asmparser. In // 32-bit mode, the address size prefix is jcxz and the unprefixed version is // jecxz. @@ -110,36 +110,46 @@ let isBranch = 1, isTerminator = 1 in { // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>; + [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>, + Sched<[WriteJump]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>; + [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, + Requires<[In32BitMode]>, Sched<[WriteJumpLd]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>; + [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>, + Sched<[WriteJump]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>; + [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, + Requires<[In64BitMode]>, Sched<[WriteJumpLd]>; def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize; + "ljmp{w}\t{$seg, $off|$off, $seg}", [], + IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>; def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), - "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>; + "ljmp{l}\t{$seg, $off|$off, $seg}", [], + IIC_JMP_FAR_PTR>, Sched<[WriteJump]>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>; + "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>, + Sched<[WriteJump]>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), - "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize; + "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize, + Sched<[WriteJumpLd]>; def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), - "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>; + "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>, + Sched<[WriteJumpLd]>; } // Loop instructions - +let SchedRW = [WriteJump] in { def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>; def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>; def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>; +} //===----------------------------------------------------------------------===// // Call Instructions... @@ -152,27 +162,32 @@ let isCall = 1 in let Uses = [ESP] in { def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst), - "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>; + "call{l}\t$dst", [], IIC_CALL_RI>, + Requires<[In32BitMode]>, Sched<[WriteJump]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, - Requires<[In32BitMode]>; + Requires<[In32BitMode]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), - "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, - Requires<[In32BitMode]>; + "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], + IIC_CALL_MEM>, + Requires<[In32BitMode,FavorMemIndirectCall]>, + Sched<[WriteJumpLd]>; def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), "lcall{w}\t{$seg, $off|$off, $seg}", [], - IIC_CALL_FAR_PTR>, OpSize; + IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>; def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), "lcall{l}\t{$seg, $off|$off, $seg}", [], - IIC_CALL_FAR_PTR>; + IIC_CALL_FAR_PTR>, Sched<[WriteJump]>; def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), - "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize; + "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize, + Sched<[WriteJumpLd]>; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), - "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>; + "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>, + Sched<[WriteJumpLd]>; // callw for 16 bit code for the assembler. let isAsmParserOnly = 1 in @@ -185,7 +200,7 @@ let isCall = 1 in // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in + isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset), []>; @@ -216,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, // RSP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. Uses for argument // registers are added manually. -let isCall = 1, Uses = [RSP] in { +let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in { // NOTE: this pattern doesn't match "X86call imm", because we do not know // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. @@ -231,7 +246,7 @@ let isCall = 1, Uses = [RSP] in { def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, - Requires<[In64BitMode]>; + Requires<[In64BitMode,FavorMemIndirectCall]>; def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; @@ -245,13 +260,12 @@ let isCall = 1, isCodeGenOnly = 1 in def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "call{q}\t$dst", [], IIC_CALL_RI>, - Requires<[IsWin64]>; + Requires<[IsWin64]>, Sched<[WriteJump]>; } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in - let Uses = [RSP], - usesCustomInserter = 1 in { + isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1, + SchedRW = [WriteJump] in { def TCRETURNdi64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 2eb454ded2..6dc7175357 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB, + Sched<[WriteALULd]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB, + Sched<[WriteALULd]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALULd]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -106,38 +112,42 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>, + Sched<[WriteALU]>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>, + Sched<[WriteALULd]>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALU]>; def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALU]>; def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // FIXME: These should be Pat patterns. let isCodeGenOnly = 1 in { @@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], - IIC_MOVZX>, TB; + IIC_MOVZX>, TB, Sched<[WriteALULd]>; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>, + Sched<[WriteALU]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], - IIC_MOVZX>; + IIC_MOVZX>, Sched<[WriteALULd]>; } diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 568726e08e..2224a08d59 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -422,7 +422,7 @@ def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; } -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src", IIC_FLD>; def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src", @@ -436,7 +436,7 @@ def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src", def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src", IIC_FILD>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst", IIC_FST>; def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst", @@ -481,7 +481,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; } // Predicates = [HasSSE3] -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst", IIC_FST>; def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst", @@ -491,6 +491,7 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), } // FP Stack manipulation instructions. +let SchedRW = [WriteMove] in { def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op", IIC_FLD>, D9; def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op", @@ -499,6 +500,7 @@ def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op", IIC_FST>, DD; def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op", IIC_FXCH>, D9; +} // Floating point constant loads. let isReMaterializable = 1 in { @@ -516,19 +518,23 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, [(set RFP80:$dst, fpimm1)]>; } +let SchedRW = [WriteZero] in { def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9; def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9; - +} // Floating point compares. +let SchedRW = [WriteFAdd] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>; def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>; +} // SchedRW } // Defs = [FPSW] +let SchedRW = [WriteFAdd] in { // CC = ST(0) cmp ST(i) let Defs = [EFLAGS, FPSW] in { def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, @@ -566,8 +572,10 @@ def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), "fcompi\t$reg", IIC_FCOMI>, DF; } +} // SchedRW // Floating point flag ops. +let SchedRW = [WriteALU] in { let Defs = [AX], Uses = [FPSW] in def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags (outs), (ins), "fnstsw %ax", @@ -576,23 +584,26 @@ def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>; - +} // SchedRW let mayLoad = 1 in def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] - (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>; + (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>, + Sched<[WriteLoad]>; // FPU control instructions +let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW] in def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB; def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), "ffree\t$reg", IIC_FFREE>, DD; - // Clear exceptions let Defs = [FPSW] in def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB; +} // SchedRW // Operandless floating-point instructions for the disassembler. +let SchedRW = [WriteMicrocoded] in { def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>; def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9; @@ -627,6 +638,7 @@ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 44e574d246..0ef9491eb7 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -45,14 +45,15 @@ def MRM_D0 : Format<45>; def MRM_D1 : Format<46>; def MRM_D4 : Format<47>; def MRM_D5 : Format<48>; -def MRM_D8 : Format<49>; -def MRM_D9 : Format<50>; -def MRM_DA : Format<51>; -def MRM_DB : Format<52>; -def MRM_DC : Format<53>; -def MRM_DD : Format<54>; -def MRM_DE : Format<55>; -def MRM_DF : Format<56>; +def MRM_D6 : Format<49>; +def MRM_D8 : Format<50>; +def MRM_D9 : Format<51>; +def MRM_DA : Format<52>; +def MRM_DB : Format<53>; +def MRM_DC : Format<54>; +def MRM_DD : Format<55>; +def MRM_DE : Format<56>; +def MRM_DF : Format<57>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -208,47 +209,47 @@ class PseudoI<dag oops, dag iops, list<dag> pattern> } class I<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + list<dag> pattern, InstrItinClass itin = NoItinerary, Domain d = GenericDomain> : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + list<dag> pattern, InstrItinClass itin = NoItinerary, Domain d = GenericDomain> : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; @@ -257,12 +258,12 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm, - InstrItinClass itin = IIC_DEFAULT> + InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, [], itin> {} // FpI_ - Floating Point Pseudo Instruction template. Not Predicated. class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern, - InstrItinClass itin = IIC_DEFAULT> + InstrItinClass itin = NoItinerary> : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> { let FPForm = fp; let Pattern = pattern; @@ -275,14 +276,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern, // Iseg32 - 16-bit segment selector, 32-bit offset class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; @@ -292,7 +293,7 @@ def __xs : XS; // SI - SSE 1 & 2 scalar instructions class SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); @@ -303,7 +304,7 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm, // SIi8 - SSE 1 & 2 scalar instructions class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); @@ -350,25 +351,25 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // VPSI - SSE1 instructions with TB prefix in AVX form. class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, Requires<[HasAVX]>; class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB, Requires<[HasAVX]>; @@ -388,42 +389,42 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMX operands. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, Requires<[HasAVX]>; class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: @@ -433,15 +434,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // S3DI - SSE3 instructions with XD prefix. class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[UseSSE3]>; @@ -458,19 +459,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, // classes. They need to be enabled even if AVX is enabled. class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[UseSSSE3]>; class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSSE3]>; class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; @@ -480,11 +481,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8. // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[UseSSE41]>; @@ -492,19 +493,19 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. // NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[UseSSE42]>; @@ -514,11 +515,11 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX8I - AVX instructions with T8 and OpSize prefix. // AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX]>; class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX]>; @@ -528,11 +529,11 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX28I - AVX2 instructions with T8 and OpSize prefix. // AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX2]>; class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX2]>; @@ -541,53 +542,53 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AES8I // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasAES]>; class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasAES]>; // PCLMUL Instruction Templates class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasPCLMUL]>; class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>; // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, T8, - OpSize, VEX_4V, Requires<[HasFMA]>; + OpSize, VEX_4V, FMASC, Requires<[HasFMA]>; // FMA4 Instruction Templates class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, TA, - OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; + OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP9, Requires<[HasXOP]>; // XOP 2, 3 and 4 Operand Instruction Templates with imm byte class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP8, Requires<[HasXOP]>; // XOP 5 operand instruction (VEX encoding!) class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; @@ -595,33 +596,33 @@ class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, // class RI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W { let Pattern = pattern; let CodeSize = 3; } class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W; // MMX Instruction templates @@ -635,23 +636,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>; class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>; class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>; class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>; class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 17714acd86..7ba542c875 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3655,7 +3655,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Align) const { const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; + bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect(); bool isTwoAddrFold = false; + + // Atom favors register form of call. So, we do not fold loads into calls + // when X86Subtarget is Atom. + if (isCallRegIndirect && + (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) { + return NULL; + } + unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d989ec7bb0..ccc1aa2e35 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -142,6 +142,9 @@ def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>; def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; +def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand, + [SDNPHasChain, SDNPSideEffect]>; + def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -603,7 +606,12 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; +def HasHLE : Predicate<"Subtarget->hasHLE()">; +def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; def HasADX : Predicate<"Subtarget->hasADX()">; +def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; +def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; +def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; @@ -626,6 +634,7 @@ def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; +def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -758,7 +767,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ // // Nop -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteZero] in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>; def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero), "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize; @@ -769,8 +778,9 @@ let neverHasSideEffects = 1 in { // Constructing a stack frame. def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl), - "enter\t$len, $lvl", [], IIC_ENTER>; + "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>; +let SchedRW = [WriteALU] in { let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, @@ -780,13 +790,14 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG16>, OpSize; def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], @@ -803,9 +814,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, Requires<[In32BitMode]>; -} +} // mayLoad, SchedRW -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], @@ -832,29 +843,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, Requires<[In32BitMode]>; -} +} // mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], IIC_POP_MEM>; -} -let mayStore = 1 in { +} // mayLoad, SchedRW +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>; -} +} // mayStore, SchedRW } -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1, + SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), @@ -865,23 +877,24 @@ def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>, - Requires<[In64BitMode]>; + Requires<[In64BitMode]>, Sched<[WriteLoad]>; let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, - Requires<[In64BitMode]>; + Requires<[In64BitMode]>, Sched<[WriteStore]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], - mayLoad=1, neverHasSideEffects=1 in { + mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], - mayStore=1, neverHasSideEffects=1 in { + mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +let Constraints = "$src = $dst", SchedRW = [WriteALU] in { +// GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -890,60 +903,63 @@ def BSWAP32r : I<0xC8, AddRegFrm, def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW // Bit scan instructions. let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], - IIC_BSF>, TB, OpSize; + IIC_BSF>, TB, OpSize, Sched<[WriteShift]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], - IIC_BSF>, TB, OpSize; + IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB; + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB, + Sched<[WriteShift]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], - IIC_BSF>, TB; + IIC_BSF>, TB, Sched<[WriteShiftLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], - IIC_BSF>, TB; + IIC_BSF>, TB, Sched<[WriteShift]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], - IIC_BSF>, TB; + IIC_BSF>, TB, Sched<[WriteShiftLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>, - TB, OpSize; + TB, OpSize, Sched<[WriteShift]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], IIC_BSR>, TB, - OpSize; + OpSize, Sched<[WriteShiftLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB; + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB, + Sched<[WriteShift]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], - IIC_BSR>, TB; + IIC_BSR>, TB, Sched<[WriteShiftLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB; + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB, + Sched<[WriteShift]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], - IIC_BSR>, TB; + IIC_BSR>, TB, Sched<[WriteShiftLd]>; } // Defs = [EFLAGS] - +let SchedRW = [WriteMicrocoded] in { // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>; @@ -971,12 +987,12 @@ def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>; def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize; def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>; def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; - +} // SchedRW //===----------------------------------------------------------------------===// // Move Instructions. // - +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; @@ -987,6 +1003,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -1004,7 +1021,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } +} // SchedRW +let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; @@ -1017,9 +1036,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. +let SchedRW = [WriteALU] in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1038,6 +1059,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; +} // FIXME: These definitions are utterly broken // Just leave them commented out for now because they're useless outside @@ -1055,7 +1077,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), */ -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -1066,7 +1088,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; @@ -1081,6 +1103,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } +let SchedRW = [WriteStore] in { def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; @@ -1093,6 +1116,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1101,34 +1125,37 @@ let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, + Sched<[WriteMove]>; let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteStore]>; let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteLoad]>; } // Condition code ops, incl. set if equal/not equal/... +let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))], IIC_AHF>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], IIC_AHF>; // AH = flags - +} // SchedRW //===----------------------------------------------------------------------===// // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { +let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, @@ -1139,13 +1166,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; +} // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), @@ -1166,6 +1194,7 @@ let mayLoad = 1, hasSideEffects = 0 in { >, TB; } +let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], @@ -1178,10 +1207,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], IIC_BT_RI>, TB; +} // SchedRW // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. +let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) @@ -1194,8 +1225,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; +} // SchedRW let hasSideEffects = 0 in { +let SchedRW = [WriteALU] in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1203,8 +1236,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1214,6 +1248,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1221,8 +1256,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1232,6 +1268,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1239,8 +1276,9 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1250,6 +1288,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1257,8 +1296,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1268,6 +1308,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1275,8 +1316,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1286,6 +1328,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1293,8 +1336,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1315,7 +1359,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), // operand is referenced, the atomicity is ensured. multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin> { - let Constraints = "$val = $dst" in { + let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in { def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), @@ -1350,6 +1394,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>; // Swap between registers. +let SchedRW = [WriteALU] in { let Constraints = "$val = $dst" in { def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; @@ -1374,9 +1419,9 @@ def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), Requires<[In64BitMode]>; def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>; +} // SchedRW - - +let SchedRW = [WriteALU] in { def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), @@ -1386,8 +1431,9 @@ def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), @@ -1400,6 +1446,7 @@ def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } +let SchedRW = [WriteALU] in { def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG8>, TB; @@ -1412,7 +1459,9 @@ def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG>, TB; +} // SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], @@ -1436,7 +1485,7 @@ let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>, TB, Requires<[HasCmpxchg16b]>; - +} // SchedRW // Lock instruction prefix @@ -1459,17 +1508,21 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; // String manipulation instructions +let SchedRW = [WriteMicrocoded] in { def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>; def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize; def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>; def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>; +} +let SchedRW = [WriteSystem] in { def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>; def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize; def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>; - +} // Flag instructions +let SchedRW = [WriteALU] in { def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>; def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>; def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; @@ -1479,10 +1532,13 @@ def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>; def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; +} // Table lookup instructions -def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>; +def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>, + Sched<[WriteLoad]>; +let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, @@ -1512,7 +1568,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[In32BitMode]>; +} // SchedRW +let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, @@ -1528,11 +1586,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // MOVBE Instructions // let Predicates = [HasMOVBE] in { + let SchedRW = [WriteALULd] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, @@ -1545,6 +1605,8 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, T8; + } + let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, @@ -1557,6 +1619,7 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, T8; + } } //===----------------------------------------------------------------------===// @@ -1575,6 +1638,21 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in { } //===----------------------------------------------------------------------===// +// RDSEED Instruction +// +let Predicates = [HasRDSEED], Defs = [EFLAGS] in { + def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), + "rdseed{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB; + def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), + "rdseed{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB; + def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), + "rdseed{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB; +} + +//===----------------------------------------------------------------------===// // LZCNT Instruction // let Predicates = [HasLZCNT], Defs = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 127af6f7f9..49721df7c1 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -20,6 +20,7 @@ // MMX Multiclasses //===----------------------------------------------------------------------===// +let Sched = WriteVecALU in { def MMX_INTALU_ITINS : OpndItins< IIC_MMX_ALU_RR, IIC_MMX_ALU_RM >; @@ -35,11 +36,14 @@ def MMX_PHADDSUBW : OpndItins< def MMX_PHADDSUBD : OpndItins< IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM >; +} +let Sched = WriteVecIMul in def MMX_PMUL_ITINS : OpndItins< IIC_MMX_PMUL, IIC_MMX_PMUL >; +let Sched = WriteVecALU in { def MMX_PSADBW_ITINS : OpndItins< IIC_MMX_PSADBW, IIC_MMX_PSADBW >; @@ -47,11 +51,13 @@ def MMX_PSADBW_ITINS : OpndItins< def MMX_MISC_FUNC_ITINS : OpndItins< IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG >; +} def MMX_SHIFT_ITINS : ShiftOpndItins< IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI >; +let Sched = WriteShuffle in { def MMX_UNPCK_H_ITINS : OpndItins< IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM >; @@ -67,7 +73,9 @@ def MMX_PCK_ITINS : OpndItins< def MMX_PSHUF_ITINS : OpndItins< IIC_MMX_PSHUF, IIC_MMX_PSHUF >; +} // Sched +let Sched = WriteCvtF2I in { def MMX_CVT_PD_ITINS : OpndItins< IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM >; @@ -75,6 +83,7 @@ def MMX_CVT_PD_ITINS : OpndItins< def MMX_CVT_PS_ITINS : OpndItins< IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM >; +} let Constraints = "$src1 = $dst" in { // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. @@ -84,7 +93,8 @@ let Constraints = "$src1 = $dst" in { def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> { + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>, + Sched<[itins.Sched]> { let isCommutable = Commutable; } def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), @@ -92,7 +102,7 @@ let Constraints = "$src1 = $dst" in { !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, @@ -101,17 +111,19 @@ let Constraints = "$src1 = $dst" in { def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>; + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>, + Sched<[WriteVecShift]>; def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst), (ins VR64:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>; + [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>, + Sched<[WriteVecShift]>; } } @@ -120,13 +132,14 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; + [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (memopmmx addr:$src))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// Binary MMX instructions requiring SSSE3. @@ -137,13 +150,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, - (bitconvert (memopmmx addr:$src2))))], itins.rm>; + (bitconvert (memopmmx addr:$src2))))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -164,9 +179,11 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, Domain d> { def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>, + Sched<[itins.Sched]>; def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>; + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -174,11 +191,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, PatFrag ld_frag, string asm, Domain d> { def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - IIC_DEFAULT, d>; + NoItinerary, d>; def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), asm, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - IIC_DEFAULT, d>; + NoItinerary, d>; } //===----------------------------------------------------------------------===// @@ -197,16 +214,17 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (scalar_to_vector GR32:$src)))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; let canFoldAsLoad = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (scalar_to_vector (loadi32 addr:$src))))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>; + "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>, + Sched<[WriteStore]>; // Low word of MMX to GPR. def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, @@ -214,16 +232,18 @@ def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, - (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>; + (MMX_X86movd2w (x86mmx VR64:$src)))], + IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", - [], IIC_MMX_MOV_MM_RM>; + [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. +let SchedRW = [WriteMove] in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", @@ -237,6 +257,9 @@ let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; +} // SchedRW + +let SchedRW = [WriteLoad] in { let canFoldAsLoad = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -246,7 +269,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; +} // SchedRW +let SchedRW = [WriteMove] in { def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, @@ -271,11 +296,12 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; +} // SchedRW def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)], - IIC_MMX_MOVQ_RM>; + IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>; let AddedComplexity = 15 in // movd to MMX register zero-extends @@ -283,7 +309,7 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), @@ -291,7 +317,7 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), [(set VR64:$dst, (x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>; // Arithmetic Instructions defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b, @@ -491,14 +517,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))], - IIC_MMX_PSHUF>; + IIC_MMX_PSHUF>, Sched<[WriteShuffle]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w (load_mmx addr:$src1), imm:$src2))], - IIC_MMX_PSHUF>; + IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>; @@ -532,7 +558,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1, (iPTR imm:$src2)))], - IIC_MMX_PEXTR>; + IIC_MMX_PEXTR>, Sched<[WriteShuffle]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), @@ -540,7 +566,7 @@ let Constraints = "$src1 = $dst" in { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, GR32:$src2, (iPTR imm:$src3)))], - IIC_MMX_PINSRW>; + IIC_MMX_PINSRW>, Sched<[WriteShuffle]>; def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem, (outs VR64:$dst), @@ -549,7 +575,7 @@ let Constraints = "$src1 = $dst" in { [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), (iPTR imm:$src3)))], - IIC_MMX_PINSRW>; + IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>; } // Mask creation @@ -570,6 +596,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), (x86mmx (MMX_MOVQ64rm addr:$src))>; // Misc. +let SchedRW = [WriteShuffle] in { let Uses = [EDI] in def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", @@ -580,6 +607,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)], IIC_MMX_MASKMOV>; +} // 64-bit bit convert. let Predicates = [HasSSE2] in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0979752757..384238741b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins<OpndItins arg_s, OpndItins arg_d> { @@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, // scalar +let Sched = WriteFAdd in { def SSE_ALU_F32S : OpndItins< IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM >; @@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins< def SSE_ALU_F64S : OpndItins< IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM >; +} def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S >; // parallel +let Sched = WriteFAdd in { def SSE_ALU_F32P : OpndItins< IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM >; @@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins< def SSE_ALU_F64P : OpndItins< IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM >; +} def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr>; + RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm>; + RC:$src1, mem_cpat:$src2))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class @@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - itins.rm, d>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, NoItinerary, d>, + Sched<[WriteVecLogic]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, IIC_DEFAULT, d>; + pat_rm, NoItinerary, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -345,7 +370,7 @@ let Predicates = [HasAVX] in { // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } @@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX] in { + isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } @@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in @@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, !strconcat(base_opc, asm_opr), [(set VR128:$dst, (vt (OpNode VR128:$src1, (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), !strconcat(base_opc, asm_opr), - [], IIC_SSE_MOV_S_RR>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; } multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, @@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, @@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; @@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [], itins.rr, d>; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [], itins.rm, d>; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2F]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2FLd, ReadAfterLd]>; } // neverHasSideEffects = 1 } @@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))], - IIC_SSE_CVT_Scalar_RR>; + IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, @@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert scalar single to scalar double @@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f64 (fextend FR32:$src)), @@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[UseSSE2]>; + Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; // Convert Packed Double FP to Packed DW Integers @@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in { def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - VEX; + VEX, Sched<[WriteCvtF2I]>; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX, + Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, + Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } @@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, + Sched<[WriteCvtF2ILd]>; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in { def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, + Sched<[WriteCvtF2ILd]>; // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB; + IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB; + IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; } // Convert Packed DW Integers to Packed Double FP @@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in { let neverHasSideEffects = 1, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - []>, VEX; + []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, + Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L, + Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, + Sched<[WriteCvtI2F]>; } let neverHasSideEffects = 1, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { @@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in { def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; // AVX 256-bit register conversion intrinsics @@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RR>; + IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RM>; + IIC_SSE_ALU_F32S_RM>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], - itins.rr>; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], - IIC_SSE_COMIS_RR, d>; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))], - IIC_SSE_COMIS_RM, d>; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX, VEX_L; + SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX, VEX_L; + OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1] in { +let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), IIC_SSE_PREFETCH>, TB; } +// FIXME: How should these memory instructions be modeled? +let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], @@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, TB, Requires<[HasSSE2]>; +} // SchedRW def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3432,17 +3535,17 @@ def : Pat<(X86MFence), (MFENCE)>; def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>, VEX; + IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>, VEX; + IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>; + IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>; def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>; + IIC_SSE_STMXCSR>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions @@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, - (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpNode VR256:$src1, - (bc_frag (memopv4i64 addr:$src2))))]>; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int @@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>, VEX; + IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends // +let SchedRW = [WriteMove] in { let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>; } +} // SchedRW -let AddedComplexity = 20 in { +let AddedComplexity = 20, SchedRW = [WriteLoad] in { def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>; -} +} // AddedComplexity, SchedRW let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. @@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int // + +let SchedRW = [WriteLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix +} // SchedRW //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int // +let SchedRW = [WriteStore] in { def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; +} // SchedRW //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, + Sched<[WriteStore]>; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[UseSSE2]>; + XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)), // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // +let SchedRW = [WriteVecLogic] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, XS, Requires<[UseSSE2]>; +} // SchedRW +let SchedRW = [WriteVecLogicLd] in { let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; } +} // SchedRW let AddedComplexity = 20 in { let Predicates = [HasAVX] in { @@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in { } // Instructions to match in the assembler +let SchedRW = [WriteMove] in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; @@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; +} // SchedRW // Instructions for the disassembler // xr = XMM register // xm = mem64 +let SchedRW = [WriteMove] in { let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; +} // SchedRW //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP @@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (vt (OpNode RC:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (OpNode (mem_frag addr:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> { let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [], IIC_SSE_MOV_LH>; + [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, + Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>; + (scalar_to_vector (loadf64 addr:$src)))))]>, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], IIC_SSE_LDDQU>; +} //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize; + OpSize, Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (memopv4i64 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize, + Sched<[WriteVecALULd]>; } let Predicates = [HasAVX] in { @@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins< def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; +} +let Sched = WriteShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; +let Sched = WriteVecALU in def SSE_PSIGN : OpndItins< IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM >; +let Sched = WriteVecIMul in def SSE_PMULHRSW : OpndItins< IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW >; @@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, @@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, @@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; +} // SchedRW def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; @@ -6679,7 +6839,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), @@ -6688,7 +6848,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 1185941d34..5b6298b541 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -15,7 +15,7 @@ let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", @@ -62,9 +62,10 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", [], IIC_SR>; } // hasSideEffects = 0 } // isConvertibleToThreeAddress = 1 -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern // using CL? let Uses = [CL] in { @@ -118,8 +119,9 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -163,9 +165,10 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -216,8 +219,9 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -273,9 +277,10 @@ def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -330,13 +335,14 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t$dst", [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW //===----------------------------------------------------------------------===// // Rotate instructions //===----------------------------------------------------------------------===// let hasSideEffects = 0 in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", [], IIC_SR>; def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), @@ -405,6 +411,7 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), } // Constraints = "$src = $dst" +let SchedRW = [WriteShiftLd, WriteRMW] in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t$dst", [], IIC_SR>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), @@ -458,9 +465,10 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } +} // SchedRW } // hasSideEffects = 0 -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -512,8 +520,9 @@ def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", @@ -568,8 +577,9 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -620,8 +630,9 @@ def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -676,13 +687,14 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t$dst", [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW //===----------------------------------------------------------------------===// // Double shift instructions (generalizations of rotate) //===----------------------------------------------------------------------===// -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), @@ -765,8 +777,9 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; } -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", @@ -840,6 +853,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, (i8 imm:$src3)), addr:$dst)], IIC_SHD64_MEM_IM>, TB; +} // SchedRW } // Defs = [EFLAGS] @@ -857,12 +871,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TAXD, VEX; + []>, TAXD, VEX, Sched<[WriteShift]>; let mayLoad = 1 in def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TAXD, VEX; + []>, TAXD, VEX, Sched<[WriteShiftLd]>; } } @@ -870,11 +884,17 @@ multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX_4VOp3; + VEX_4VOp3, Sched<[WriteShift]>; let mayLoad = 1 in def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX_4VOp3; + VEX_4VOp3, + Sched<[WriteShiftLd, + // x86memop:$src1 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src1 + ReadAfterLd]>; } } diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 3caa1b538c..053417ccde 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, TB; @@ -35,6 +36,7 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))], IIC_INT3>; +} // SchedRW def : Pat<(debugtrap), (INT3)>; @@ -43,6 +45,7 @@ def : Pat<(debugtrap), // FIXME: This doesn't work because InstAlias can't match immediate constants. //def : InstAlias<"int\t$3", (INT3)>; +let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; @@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Input/Output Instructions. // +let SchedRW = [WriteSystem] in { let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>; @@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize; def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from debug registers +let SchedRW = [WriteSystem] in { def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), @@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from control registers +let SchedRW = [WriteSystem] in { def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), @@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Segment override instruction prefixes @@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // Moves to and from segment registers. // +let SchedRW = [WriteMove] in { def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), @@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; +} // SchedRW //===----------------------------------------------------------------------===// // Segmentation support instructions. +let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), @@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Descriptor-table support instructions +let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), @@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; - +} // SchedRW + //===----------------------------------------------------------------------===// // Specialized register support +let SchedRW = [WriteSystem] in { def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; @@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Cache instructions +let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // XSAVE instructions +let SchedRW = [WriteSystem] in { let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; @@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in { def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index a37a8cc744..363a190aa8 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -15,6 +15,9 @@ //===----------------------------------------------------------------------===// // TSX instructions +def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPSideEffect]>; + let usesCustomInserter = 1 in def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins), "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>, @@ -27,6 +30,10 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst), def XEND : I<0x01, MRM_D5, (outs), (ins), "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; +let Defs = [EFLAGS] in +def XTEST : I<0x01, MRM_D6, (outs), (ins), + "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>; + def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3af1b3e06b..a8a9fd8acc 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -407,6 +407,57 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B + // if one of the registers is extended, but other isn't. + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td new file mode 100644 index 0000000000..b3eb460d3c --- /dev/null +++ b/lib/Target/X86/X86SchedHaswell.td @@ -0,0 +1,126 @@ +//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Haswell to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def HaswellModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and HW can decode 4 + // instructions per cycle. + let IssueWidth = 4; + let MinLatency = 0; // 0 = Out-of-order execution. + let LoadLatency = 4; + let ILPWindow = 40; + let MispredictPenalty = 16; +} + +let SchedModel = HaswellModel in { + +// Haswell can issue micro-ops to 8 different ports in one cycle. + +// Ports 0, 1, 5, 6 and 7 handle all computation. +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. Port 7 can handle address calculations. +def HWPort0 : ProcResource<1>; +def HWPort1 : ProcResource<1>; +def HWPort2 : ProcResource<1>; +def HWPort3 : ProcResource<1>; +def HWPort4 : ProcResource<1>; +def HWPort5 : ProcResource<1>; +def HWPort6 : ProcResource<1>; +def HWPort7 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>; +def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>; +def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>; +def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>; +def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>; +def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>; +def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>; + +// Integer division issued on port 0, but uses the non-pipelined divider. +def HWDivider : ProcResource<1> { let Buffered = 0; } + +// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 +// cycles after the memory operand. +def : ReadAdvance<ReadAfterLd, 4>; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW, + ProcResourceKind ExePort, + int Lat> { + // Register variant is using a single cycle on ExePort. + def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the + // latency. + def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> { + let Latency = !add(Lat, 4); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes<WriteRMW, [HWPort4]>; + +def : WriteRes<WriteStore, [HWPort237, HWPort4]>; +def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; } +def : WriteRes<WriteMove, [HWPort0156]>; +def : WriteRes<WriteZero, []>; + +defm : HWWriteResPair<WriteALU, HWPort0156, 1>; +defm : HWWriteResPair<WriteIMul, HWPort1, 3>; +defm : HWWriteResPair<WriteShift, HWPort056, 1>; +defm : HWWriteResPair<WriteJump, HWPort5, 1>; + +// This is for simple LEAs with one or two input operands. +// The complex ones can only execute on port 1, and they require two cycles on +// the port to read all inputs. We don't model that. +def : WriteRes<WriteLEA, [HWPort15]>; + +// This is quite rough, latency depends on the dividend. +def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> { + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +// Scalar and vector floating point. +defm : HWWriteResPair<WriteFAdd, HWPort1, 3>; +defm : HWWriteResPair<WriteFMul, HWPort0, 5>; +defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles. +defm : HWWriteResPair<WriteFRcp, HWPort0, 5>; +defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>; +defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>; +defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>; +defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>; + +// Vector integer operations. +defm : HWWriteResPair<WriteVecShift, HWPort05, 1>; +defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>; +defm : HWWriteResPair<WriteVecALU, HWPort15, 1>; +defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>; +defm : HWWriteResPair<WriteShuffle, HWPort15, 1>; + +def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; } +def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; } +} // SchedModel diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td new file mode 100644 index 0000000000..66d78e4fc4 --- /dev/null +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -0,0 +1,122 @@ +//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Sandy Bridge to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def SandyBridgeModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and SB can decode 4 + // instructions per cycle. + // FIXME: Identify instructions that aren't a single fused micro-op. + let IssueWidth = 4; + let MinLatency = 0; // 0 = Out-of-order execution. + let LoadLatency = 4; + let ILPWindow = 30; + let MispredictPenalty = 16; +} + +let SchedModel = SandyBridgeModel in { + +// Sandy Bridge can issue micro-ops to 6 different ports in one cycle. + +// Ports 0, 1, and 5 handle all computation. +def SBPort0 : ProcResource<1>; +def SBPort1 : ProcResource<1>; +def SBPort5 : ProcResource<1>; + +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. +def SBPort23 : ProcResource<2>; + +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +def SBPort4 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; +def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; +def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; + +// Integer division issued on port 0, but uses the non-pipelined divider. +def SBDivider : ProcResource<1> { let Buffered = 0; } + +// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 +// cycles after the memory operand. +def : ReadAdvance<ReadAfterLd, 4>; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW, + ProcResourceKind ExePort, + int Lat> { + // Register variant is using a single cycle on ExePort. + def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the + // latency. + def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> { + let Latency = !add(Lat, 4); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes<WriteRMW, [SBPort4]>; + +def : WriteRes<WriteStore, [SBPort23, SBPort4]>; +def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; } +def : WriteRes<WriteMove, [SBPort015]>; +def : WriteRes<WriteZero, []>; + +defm : SBWriteResPair<WriteALU, SBPort015, 1>; +defm : SBWriteResPair<WriteIMul, SBPort1, 3>; +defm : SBWriteResPair<WriteShift, SBPort05, 1>; +defm : SBWriteResPair<WriteJump, SBPort5, 1>; + +// This is for simple LEAs with one or two input operands. +// The complex ones can only execute on port 1, and they require two cycles on +// the port to read all inputs. We don't model that. +def : WriteRes<WriteLEA, [SBPort15]>; + +// This is quite rough, latency depends on the dividend. +def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> { + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +// Scalar and vector floating point. +defm : SBWriteResPair<WriteFAdd, SBPort1, 3>; +defm : SBWriteResPair<WriteFMul, SBPort0, 5>; +defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles. +defm : SBWriteResPair<WriteFRcp, SBPort0, 5>; +defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>; +defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>; +defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>; +defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>; + +// Vector integer operations. +defm : SBWriteResPair<WriteVecShift, SBPort05, 1>; +defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>; +defm : SBWriteResPair<WriteVecALU, SBPort15, 1>; +defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>; +defm : SBWriteResPair<WriteShuffle, SBPort15, 1>; + +def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; } +def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; } +} // SchedModel diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d99d085298..9fbde88b71 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,9 +7,94 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast<SchedWrite>(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. + +// FMA Scheduling helper class. +class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 -def IIC_DEFAULT : InstrItinClass; def IIC_ALU_MEM : InstrItinClass; def IIC_ALU_NONMEM : InstrItinClass; def IIC_LEA : InstrItinClass; @@ -484,3 +569,5 @@ def GenericModel : SchedMachineModel { } include "X86ScheduleAtom.td" +include "X86SchedSandyBridge.td" +include "X86SchedHaswell.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 1e5f2d6c9a..cce8f1b114 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -33,7 +33,6 @@ def AtomItineraries : ProcessorItineraries< // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >, // // Default is 1 cycle, port0 or port1 - InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >, InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0f2c008ab9..4132463ee8 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -283,6 +283,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasLZCNT = true; ToggleFeature(X86::FeatureLZCNT); } + if (IsIntel && ((ECX >> 8) & 0x1)) { + HasPRFCHW = true; + ToggleFeature(X86::FeaturePRFCHW); + } if (IsAMD) { if ((ECX >> 6) & 0x1) { HasSSE4A = true; @@ -310,6 +314,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasBMI = true; ToggleFeature(X86::FeatureBMI); } + if ((EBX >> 4) & 0x1) { + HasHLE = true; + ToggleFeature(X86::FeatureHLE); + } if (IsIntel && ((EBX >> 5) & 0x1)) { X86SSELevel = AVX2; ToggleFeature(X86::FeatureAVX2); @@ -322,6 +330,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasRTM = true; ToggleFeature(X86::FeatureRTM); } + if (IsIntel && ((EBX >> 19) & 0x1)) { + HasADX = true; + ToggleFeature(X86::FeatureADX); + } + if (IsIntel && ((EBX >> 18) & 0x1)) { + HasRDSEED = true; + ToggleFeature(X86::FeatureRDSEED); + } } } } @@ -439,7 +455,10 @@ void X86Subtarget::initializeEnvironment() { HasBMI = false; HasBMI2 = false; HasRTM = false; + HasHLE = false; HasADX = false; + HasPRFCHW = false; + HasRDSEED = false; IsBTMemSlow = false; IsUAMemFast = false; HasVectorUAMem = false; @@ -448,6 +467,7 @@ void X86Subtarget::initializeEnvironment() { HasSlowDivide = false; PostRAScheduler = false; PadShortFunctions = false; + CallRegIndirect = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e97da4b6f4..6fbdb1d5f0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -121,9 +121,18 @@ protected: /// HasRTM - Processor has RTM instructions. bool HasRTM; + /// HasHLE - Processor has HLE. + bool HasHLE; + /// HasADX - Processor has ADX instructions. bool HasADX; + /// HasPRFCHW - Processor has PRFCHW instructions. + bool HasPRFCHW; + + /// HasRDSEED - Processor has RDSEED instructions. + bool HasRDSEED; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -153,6 +162,10 @@ protected: /// a stall when returning too early. bool PadShortFunctions; + /// CallRegIndirect - True if the Calls with memory reference should be converted + /// to a register-based indirect call. + bool CallRegIndirect; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -253,7 +266,10 @@ public: bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasRTM() const { return HasRTM; } + bool hasHLE() const { return HasHLE; } bool hasADX() const { return HasADX; } + bool hasPRFCHW() const { return HasPRFCHW; } + bool hasRDSEED() const { return HasRDSEED; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } @@ -261,6 +277,7 @@ public: bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } bool padShortFunctions() const { return PadShortFunctions; } + bool callRegIndirect() const { return CallRegIndirect; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be2a997b8e..2336035bea 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry<MVT> AVX2CostTable[] = { + // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to + // customize them to detect the cases where shift amount is a scalar one. + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 1 }, + { ISD::SRA, MVT::v4i32, 1 }, + { ISD::SHL, MVT::v8i32, 1 }, + { ISD::SRL, MVT::v8i32, 1 }, + { ISD::SRA, MVT::v8i32, 1 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 1 }, + { ISD::SHL, MVT::v4i64, 1 }, + { ISD::SRL, MVT::v4i64, 1 }, + }; + + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable), + ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2CostTable[Idx].Cost; + } + static const CostTblEntry<MVT> AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. @@ -248,17 +271,40 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 019c4570d9..6b6480e4b4 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -409,7 +409,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (RegInfo->requiresRegisterScavenging(MF)) { // Reserve a slot close to SP or frame pointer. - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } |