diff options
Diffstat (limited to 'lib/Target')
97 files changed, 6389 insertions, 3443 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index cca6d12e16..572617cc07 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, return TopOfFrameOffset - FrameRegPos; } -/// Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - // FIXME: Make generic? Really consider after upstreaming. This code is now - // shared between PEI, ARM *and* here. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = - (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index c1695dacb4..69bb80a485 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -160,44 +160,53 @@ private: SMLoc StartLoc, EndLoc; + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + union { - struct { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - } ImmWithLSL; - - struct { - A64CC::CondCodes Code; - } CondCode; - - struct { - double Val; - } FPImm; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned RegNum; - } Reg; - - struct { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - } ShiftExtend; - - struct { - const char *Data; - unsigned Length; - } SysReg; - - struct { - const char *Data; - unsigned Length; - } Tok; + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; }; AArch64Operand(KindTy K, SMLoc S, SMLoc E) diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp new file mode 100644 index 0000000000..f0d4dbe2bf --- /dev/null +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -0,0 +1,704 @@ +//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The Cortex-A15 processor employs a tracking scheme in its register renaming +// in order to process each instruction's micro-ops speculatively and +// out-of-order with appropriate forwarding. The ARM architecture allows VFP +// instructions to read and write 32-bit S-registers. Each S-register +// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. +// +// There are several instruction patterns which can be used to provide this +// capability which can provide higher performance than other, potentially more +// direct patterns, specifically around when one micro-op reads a D-register +// operand that has recently been written as one or more S-register results. +// +// This file defines a pre-regalloc pass which looks for SPR producers which +// are going to be used by a DPR (or QPR) consumers and creates the more +// optimized access pattern. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "a15-sd-optimizer" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <set> + +using namespace llvm; + +namespace { + struct A15SDOptimizer : public MachineFunctionPass { + static char ID; + A15SDOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM A15 S->D optimizer"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + bool runOnInstruction(MachineInstr *MI); + + // + // Instruction builder helpers + // + unsigned createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, + bool QPR=false); + + unsigned createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC); + + unsigned createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1); + + unsigned createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2); + + unsigned createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert); + + unsigned createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL); + + // + // Various property checkers + // + bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); + bool hasPartialWrite(MachineInstr *MI); + SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); + unsigned getDPRLaneFromSPR(unsigned SReg); + + // + // Methods used for getting the definitions of partial registers + // + + MachineInstr *elideCopies(MachineInstr *MI); + void elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs); + + // + // Pattern optimization methods + // + unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); + unsigned optimizeSDPattern(MachineInstr *MI); + unsigned getPrefSPRLane(unsigned SReg); + + // + // Sanitizing method - used to make sure if don't leave dead code around. + // + void eraseInstrWithNoUses(MachineInstr *MI); + + // + // A map used to track the changes done by this pass. + // + std::map<MachineInstr*, unsigned> Replacements; + std::set<MachineInstr *> DeadInstr; + }; + char A15SDOptimizer::ID = 0; +} // end anonymous namespace + +// Returns true if this is a use of a SPR register. +bool A15SDOptimizer::usesRegClass(MachineOperand &MO, + const TargetRegisterClass *TRC) { + if (!MO.isReg()) + return false; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); + else + return TRC->contains(Reg); +} + +unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, + &ARM::DPRRegClass); + if (DReg != ARM::NoRegister) return ARM::ssub_1; + return ARM::ssub_0; +} + +// Get the subreg type that is most likely to be coalesced +// for an SPR register that will be used in VDUP32d pseudo. +unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { + if (!TRI->isVirtualRegister(SReg)) + return getDPRLaneFromSPR(SReg); + + MachineInstr *MI = MRI->getVRegDef(SReg); + if (!MI) return ARM::ssub_0; + MachineOperand *MO = MI->findRegisterDefOperand(SReg); + + assert(MO->isReg() && "Non register operand found!"); + if (!MO) return ARM::ssub_0; + + if (MI->isCopy() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + SReg = MI->getOperand(1).getReg(); + } + + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; + return ARM::ssub_0; + } + return getDPRLaneFromSPR(SReg); +} + +// MI is known to be dead. Figure out what instructions +// are also made dead by this and mark them for removal. +void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { + SmallVector<MachineInstr *, 8> Front; + DeadInstr.insert(MI); + + DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); + Front.push_back(MI); + + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // MI is already known to be dead. We need to see + // if other instructions can also be removed. + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if ((!MO.isReg()) || (!MO.isUse())) + continue; + unsigned Reg = MO.getReg(); + if (!TRI->isVirtualRegister(Reg)) + continue; + MachineOperand *Op = MI->findRegisterDefOperand(Reg); + + if (!Op) + continue; + + MachineInstr *Def = Op->getParent(); + + // We don't need to do anything if we have already marked + // this instruction as being dead. + if (DeadInstr.find(Def) != DeadInstr.end()) + continue; + + // Check if all the uses of this instruction are marked as + // dead. If so, we can also mark this instruction as being + // dead. + bool IsDead = true; + for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { + MachineOperand &MODef = Def->getOperand(j); + if ((!MODef.isReg()) || (!MODef.isDef())) + continue; + unsigned DefReg = MODef.getReg(); + if (!TRI->isVirtualRegister(DefReg)) { + IsDead = false; + break; + } + for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), + EE = MRI->use_end(); + II != EE; ++II) { + // We don't care about self references. + if (&*II == Def) + continue; + if (DeadInstr.find(&*II) == DeadInstr.end()) { + IsDead = false; + break; + } + } + } + + if (!IsDead) continue; + + DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); + DeadInstr.insert(Def); + } + } +} + +// Creates the more optimized patterns and generally does all the code +// transformations in this pass. +unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { + if (MI->isCopy()) { + return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); + } + + if (MI->isInsertSubreg()) { + unsigned DPRReg = MI->getOperand(1).getReg(); + unsigned SPRReg = MI->getOperand(2).getReg(); + + if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); + + if (DPRMI && SPRMI) { + // See if the first operand of this insert_subreg is IMPLICIT_DEF + MachineInstr *ECDef = elideCopies(DPRMI); + if (ECDef != 0 && ECDef->isImplicitDef()) { + // Another corner case - if we're inserting something that is purely + // a subreg copy of a DPR, just use that DPR. + + MachineInstr *EC = elideCopies(SPRMI); + // Is it a subreg copy of ssub_0? + if (EC && EC->isCopy() && + EC->getOperand(1).getSubReg() == ARM::ssub_0) { + DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); + + // Find the thing we're subreg copying out of - is it of the same + // regclass as DPRMI? (i.e. a DPR or QPR). + unsigned FullReg = SPRMI->getOperand(1).getReg(); + const TargetRegisterClass *TRC = + MRI->getRegClass(MI->getOperand(1).getReg()); + if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { + DEBUG(dbgs() << "Subreg copy is compatible - returning "); + DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + eraseInstrWithNoUses(MI); + return FullReg; + } + } + + return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); + } + } + } + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + // See if all bar one of the operands are IMPLICIT_DEF and insert the + // optimizer pattern accordingly. + unsigned NumImplicit = 0, NumTotal = 0; + unsigned NonImplicitReg = ~0U; + + for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { + if (!MI->getOperand(I).isReg()) + continue; + ++NumTotal; + unsigned OpReg = MI->getOperand(I).getReg(); + + if (!TRI->isVirtualRegister(OpReg)) + break; + + MachineInstr *Def = MRI->getVRegDef(OpReg); + if (!Def) + break; + if (Def->isImplicitDef()) + ++NumImplicit; + else + NonImplicitReg = MI->getOperand(I).getReg(); + } + + if (NumImplicit == NumTotal - 1) + return optimizeAllLanesPattern(MI, NonImplicitReg); + else + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + assert(0 && "Unhandled update pattern!"); + return 0; +} + +// Return true if this MachineInstr inserts a scalar (SPR) value into +// a D or Q register. +bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { + // The only way we can do a partial register update is through a COPY, + // INSERT_SUBREG or REG_SEQUENCE. + if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), + &ARM::SPRRegClass)) + return true; + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + return false; +} + +// Looks through full copies to get the instruction that defines the input +// operand for MI. +MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { + if (!MI->isFullCopy()) + return MI; + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + return NULL; + MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!Def) + return NULL; + return elideCopies(Def); +} + +// Look through full copies and PHIs to get the set of non-copy MachineInstrs +// that can produce MI. +void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs) { + // Looking through PHIs may create loops so we need to track what + // instructions we have visited before. + std::set<MachineInstr *> Reached; + SmallVector<MachineInstr *, 8> Front; + Front.push_back(MI); + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // If we have already explored this MachineInstr, ignore it. + if (Reached.find(MI) != Reached.end()) + continue; + Reached.insert(MI); + if (MI->isPHI()) { + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + unsigned Reg = MI->getOperand(I).getReg(); + if (!TRI->isVirtualRegister(Reg)) { + continue; + } + MachineInstr *NewMI = MRI->getVRegDef(Reg); + if (!NewMI) + continue; + Front.push_back(NewMI); + } + } else if (MI->isFullCopy()) { + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + continue; + MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!NewMI) + continue; + Front.push_back(NewMI); + } else { + DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); + Outs.push_back(MI); + } + } +} + +// Return the DPR virtual registers that are read by this machine instruction +// (if any). +SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { + if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || + MI->isKill()) + return SmallVector<unsigned, 8>(); + + SmallVector<unsigned, 8> Defs; + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg() || !MO.isUse()) + continue; + if (!usesRegClass(MO, &ARM::DPRRegClass) && + !usesRegClass(MO, &ARM::QPRRegClass)) + continue; + + Defs.push_back(MO.getReg()); + } + return Defs; +} + +// Creates a DPR register from an SPR one by using a VDUP. +unsigned +A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, bool QPR) { + unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : + &ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), + Out) + .addReg(Reg) + .addImm(Lane)); + + return Out; +} + +// Creates a SPR register from a DPR by copying the value in lane 0. +unsigned +A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { + unsigned Out = MRI->createVirtualRegister(TRC); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::COPY), Out) + .addReg(DReg, 0, Lane); + + return Out; +} + +// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. +unsigned +A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2) { + unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::REG_SEQUENCE), Out) + .addReg(Reg1) + .addImm(ARM::dsub_0) + .addReg(Reg2) + .addImm(ARM::dsub_1); + return Out; +} + +// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) +// and merges them into one DPR register. +unsigned +A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1)); + return Out; +} + +unsigned +A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::INSERT_SUBREG), Out) + .addReg(DReg) + .addReg(ToInsert) + .addImm(Lane); + + return Out; +} + +unsigned +A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::IMPLICIT_DEF), Out); + return Out; +} + +// This function inserts instructions in order to optimize interactions between +// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all +// lanes, and the using VEXT instructions to recompose the result. +unsigned +A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { + MachineBasicBlock::iterator InsertPt(MI); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &MBB = *MI->getParent(); + InsertPt++; + unsigned Out; + + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_0, &ARM::DPRRegClass); + unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_1, &ARM::DPRRegClass); + + unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); + unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); + Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); + + Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); + + } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { + unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + } else { + assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && + "Found unexpected regclass!"); + + unsigned PrefLane = getPrefSPRLane(Reg); + unsigned Lane; + switch (PrefLane) { + case ARM::ssub_0: Lane = 0; break; + case ARM::ssub_1: Lane = 1; break; + default: llvm_unreachable("Unknown preferred lane!"); + } + + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + + Out = createImplicitDef(MBB, InsertPt, DL); + Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); + Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); + eraseInstrWithNoUses(MI); + } + return Out; +} + +bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { + // We look for instructions that write S registers that are then read as + // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and + // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or + // merge two SPR values to form a DPR register. In order avoid false + // positives we make sure that there is an SPR producer so we look past + // COPY and PHI nodes to find it. + // + // The best code pattern for when an SPR producer is going to be used by a + // DPR or QPR consumer depends on whether the other lanes of the + // corresponding DPR/QPR are currently defined. + // + // We can handle these efficiently, depending on the type of + // pseudo-instruction that is producing the pattern + // + // * COPY: * VDUP all lanes and merge the results together + // using VEXTs. + // + // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR + // lane, and the other lane(s) of the DPR/QPR register + // that we are inserting in are undefined, use the + // original DPR/QPR value. + // * Otherwise, fall back on the same stategy as COPY. + // + // * REG_SEQUENCE: * If all except one of the input operands are + // IMPLICIT_DEFs, insert the VDUP pattern for just the + // defined input operand + // * Otherwise, fall back on the same stategy as COPY. + // + + // First, get all the reads of D-registers done by this instruction. + SmallVector<unsigned, 8> Defs = getReadDPRs(MI); + bool Modified = false; + + for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + // Follow the def-use chain for this DPR through COPYs, and also through + // PHIs (which are essentially multi-way COPYs). It is because of PHIs that + // we can end up with multiple defs of this DPR. + + SmallVector<MachineInstr *, 8> DefSrcs; + if (!TRI->isVirtualRegister(*I)) + continue; + MachineInstr *Def = MRI->getVRegDef(*I); + if (!Def) + continue; + + elideCopiesAndPHIs(Def, DefSrcs); + + for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + EE = DefSrcs.end(); II != EE; ++II) { + MachineInstr *MI = *II; + + // If we've already analyzed and replaced this operand, don't do + // anything. + if (Replacements.find(MI) != Replacements.end()) + continue; + + // Now, work out if the instruction causes a SPR->DPR dependency. + if (!hasPartialWrite(MI)) + continue; + + // Collect all the uses of this MI's DPR def for updating later. + SmallVector<MachineOperand*, 8> Uses; + unsigned DPRDefReg = MI->getOperand(0).getReg(); + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), + E = MRI->use_end(); I != E; ++I) + Uses.push_back(&I.getOperand()); + + // We can optimize this. + unsigned NewReg = optimizeSDPattern(MI); + + if (NewReg != 0) { + Modified = true; + for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + DEBUG(dbgs() << "Replacing operand " + << **I << " with " + << PrintReg(NewReg) << "\n"); + (*I)->substVirtReg(NewReg, 0, *TRI); + } + } + Replacements[MI] = NewReg; + } + } + return Modified; +} + +bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + bool Modified = false; + + DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); + + DeadInstr.clear(); + Replacements.clear(); + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + + for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); + MI != ME;) { + Modified |= runOnInstruction(MI++); + } + + } + + for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), + E = DeadInstr.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Modified; +} + +FunctionPass *llvm::createA15SDOptimizerPass() { + return new A15SDOptimizer(); +} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 5faf8c320c..80e5f37eb0 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 46915eecf6..68380847a0 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -143,14 +143,12 @@ include "ARMSchedule.td" // ARM processor families. def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 58c779830e..13ec208793 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) .addReg(ARM::PC) - .addImm(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ed001ea24a..ed8b9cd9a1 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) return false; // Look for a copy between even S-registers. That is where we keep floats diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0ca6450e2b..3b12408768 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, return FnSize; } -/// estimateStackSize - Estimate and return the size of the frame. -/// FIXME: Make generic? -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. @@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = estimateStackSize(MF); + unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) MRI.setPhysRegUsed(ARM::R4); } @@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // worth the effort and added fragility? bool BigStack = (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 514971f01e..40d2e8d265 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + // Custom expand long extensions to vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -3433,6 +3443,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), +/// and size(DestVec) > 128-bits. +/// This is achieved by doing the one extension from the SrcVec, splitting the +/// result, extending these parts, and then concatenating these into the +/// destination. +static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DestVT = N->getValueType(0); + + assert(DestVT.getSizeInBits() > 128 && + "Custom sext/zext expansion needs >128-bit vector."); + // If this is a normal length extension, use the default expansion. + if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && + SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) + return SDValue(); + + DebugLoc dl = N->getDebugLoc(); + unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); + unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); + unsigned NumElts = SrcVT.getVectorNumElements(); + LLVMContext &Ctx = *DAG.getContext(); + SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; + + EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts); + EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts/2); + EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), + NumElts/2); + + Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); + SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(0)); + SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(NumElts/2)); + ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); + ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -5621,6 +5672,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = ExpandVectorExtension(N, DAG); + break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e11314d4fc..739300e4ef 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) + const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) + , Options(Options) , TargetABI(ARM_ABI_APCS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); @@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() { AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; + UnsafeFPMath = false; } void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { @@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // configuration. if (!StrictAlign && hasV6Ops() && isTargetDarwin()) AllowsUnalignedMem = true; + + // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. + uint64_t Bits = getFeatureBits(); + if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters + (Options.UnsafeFPMath || isTargetDarwin())) + UseNEONForSinglePrecisionFP = true; } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8ce22e1de2..5b5ee6aeb8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,6 +26,7 @@ namespace llvm { class GlobalValue; class StringRef; +class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: @@ -159,6 +160,9 @@ protected: /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; + /// Target machine allowed unsafe FP math (such as use of NEON fp) + bool UnsafeFPMath; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -175,6 +179,9 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; + /// Options passed via command line that could influence the target + const TargetOptions &Options; + public: enum { isELF, isDarwin @@ -189,7 +196,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 774521852a..b0f9e56db7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +static cl::opt<bool> +DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, + cl::desc("Inhibit optimization of S->D register accesses on A15"), + cl::init(false)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), + Subtarget(TT, CPU, FS, Options), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI @@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() { addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && + getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { + addPass(createA15SDOptimizerPass()); + } return true; } @@ -174,7 +185,8 @@ bool ARMPassConfig::addPreSched2() { addPass(createARMLoadStoreOptimizationPass()); printAndVerify("After ARM load / store optimizer"); } - if (getARMSubtarget().hasNEON()) + if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) && + getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 01c04b48cf..1019b972e9 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // Single to/from double precision conversions. + static const CostTblEntry<MVT> NEONFltDblTbl[] = { + // Vector fptrunc/fpext conversions. + { ISD::FP_ROUND, MVT::v2f64, 2 }, + { ISD::FP_EXTEND, MVT::v2f32, 2 }, + { ISD::FP_EXTEND, MVT::v4f32, 4 } + }; + + if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || + ISD == ISD::FP_EXTEND)) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), + ISD, LT.second); + if (Idx != -1) + return LT.first * NEONFltDblTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); @@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + // The number of vmovl instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + + // Operations that we legalize using load/stores to the stack. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, // Vector double <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } }; if (SrcTy.isVector() && ST->hasNEON()) { @@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return NEONFloatConversionTbl[Idx].Cost; } - // Scalar integer to float conversions. static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, @@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return ARMIntegerConversionTbl[Idx].Cost; } - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } @@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6c678fdbd7..ed7b7ec9d2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SmallVector<unsigned, 8> Registers; + struct CCOp { + ARMCC::CondCodes Val; + }; + + struct CopOp { + unsigned Val; + }; + + struct CoprocOptionOp { + unsigned Val; + }; + + struct ITMaskOp { + unsigned Mask:4; + }; + + struct MBOptOp { + ARM_MB::MemBOpt Val; + }; + + struct IFlagsOp { + ARM_PROC::IFlags Val; + }; + + struct MMaskOp { + unsigned Val; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + }; + + struct VectorIndexOp { + unsigned Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + /// Combined record for all forms of ARM address expressions. + struct MemoryOp { + unsigned BaseRegNum; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (2, 4, 8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + }; + + struct PostIdxRegOp { + unsigned RegNum; + bool isAdd; + ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + }; + + struct ShifterImmOp { + bool isASR; + unsigned Imm; + }; + + struct RegShiftedRegOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + }; + + struct RegShiftedImmOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + }; + + struct RotImmOp { + unsigned Imm; + }; + + struct BitfieldOp { + unsigned LSB; + unsigned Width; + }; + union { - struct { - ARMCC::CondCodes Val; - } CC; - - struct { - unsigned Val; - } Cop; - - struct { - unsigned Val; - } CoprocOption; - - struct { - unsigned Mask:4; - } ITMask; - - struct { - ARM_MB::MemBOpt Val; - } MBOpt; - - struct { - ARM_PROC::IFlags Val; - } IFlags; - - struct { - unsigned Val; - } MMask; - - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - // A vector register list is a sequential list of 1 to 4 registers. - struct { - unsigned RegNum; - unsigned Count; - unsigned LaneIndex; - bool isDoubleSpaced; - } VectorList; - - struct { - unsigned Val; - } VectorIndex; - - struct { - const MCExpr *Val; - } Imm; - - /// Combined record for all forms of ARM address expressions. - struct { - unsigned BaseRegNum; - // Offset is in OffsetReg or OffsetImm. If both are zero, no offset - // was specified. - const MCConstantExpr *OffsetImm; // Offset immediate value - unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL - ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg - unsigned ShiftImm; // shift for OffsetReg. - unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (2, 4, 8, 16, or 32) - unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) - } Memory; - - struct { - unsigned RegNum; - bool isAdd; - ARM_AM::ShiftOpc ShiftTy; - unsigned ShiftImm; - } PostIdxReg; - - struct { - bool isASR; - unsigned Imm; - } ShifterImm; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftReg; - unsigned ShiftImm; - } RegShiftedReg; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftImm; - } RegShiftedImm; - struct { - unsigned Imm; - } RotImm; - struct { - unsigned LSB; - unsigned Width; - } Bitfield; + struct CCOp CC; + struct CopOp Cop; + struct CoprocOptionOp CoprocOption; + struct MBOptOp MBOpt; + struct ITMaskOp ITMask; + struct IFlagsOp IFlags; + struct MMaskOp MMask; + struct TokOp Tok; + struct RegOp Reg; + struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct MemoryOp Memory; + struct PostIdxRegOp PostIdxReg; + struct ShifterImmOp ShifterImm; + struct RegShiftedRegOp RegShiftedReg; + struct RegShiftedImmOp RegShiftedImm; + struct RotImmOp RotImm; + struct BitfieldOp Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!tryParseRegisterWithWriteBack(Operands)) - return false; - int Res = tryParseShiftRegister(Operands); - if (Res == 0) // success - return false; - else if (Res == -1) // irrecoverable error - return true; - // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && - Parser.getTok().getString().equals_lower("apsr_nzcv")) { - S = Parser.getTok().getLoc(); - Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); - return false; + // If we've seen a branch mnemonic, the next operand must be a label. This + // is true even if the label is a register name. So "br r1" means branch to + // label "r1". + bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl"; + if (!ExpectLabel) { + if (!tryParseRegisterWithWriteBack(Operands)) + return false; + int Res = tryParseShiftRegister(Operands); + if (Res == 0) // success + return false; + else if (Res == -1) // irrecoverable error + return true; + // If this is VMRS, check for the apsr_nzcv operand. + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); + return false; + } } // Fall though for the Identifier case that is not a register or a diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 586834cf73..b832508a08 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen + A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 4c0f93c6cd..96a252e1b8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1949,6 +1949,10 @@ isValidOffset(const int Opcode, const int Offset) const { // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is // inserted to calculate the final address. Due to this reason, the function // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. switch(Opcode) { @@ -1958,7 +1962,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: - assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); @@ -1968,14 +1971,12 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STrid: case Hexagon::STrid_indexed: case Hexagon::STrid_f: - assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); case Hexagon::LDrih: case Hexagon::LDriuh: case Hexagon::STrih: - assert((Offset % 2 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -2002,7 +2003,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::MEMw_SUBr_MEM_V4 : case Hexagon::MEMw_ANDr_MEM_V4 : case Hexagon::MEMw_ORr_MEM_V4 : - assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); return (0 <= Offset && Offset <= 255); case Hexagon::MEMh_ADDi_indexed_MEM_V4 : @@ -2017,7 +2017,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::MEMh_SUBr_MEM_V4 : case Hexagon::MEMh_ANDr_MEM_V4 : case Hexagon::MEMh_ORr_MEM_V4 : - assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); return (0 <= Offset && Offset <= 127); case Hexagon::MEMb_ADDi_indexed_MEM_V4 : diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index ad495ff306..dda6e247ac 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffReg; + const MCExpr *Off; + }; + + struct FslImmOp { + const MCExpr *Val; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - unsigned OffReg; - const MCExpr *Off; - } Mem; - - struct { - const MCExpr *Val; - } FslImm; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + struct FslImmOp FslImm; }; MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index d0fd7dcec1..bd83afc1cc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } void MBlazeRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { +processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 99a2fac95c..497f3866c9 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index ae2e55617d..e504011dfd 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } void -MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index a077dd7351..c673f59b5e 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -50,7 +50,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; }; } // End llvm namespace diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ade6084752..c403f216b0 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterClass); + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); @@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser { SmallVectorImpl<MCInst> &Instructions); void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + void expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd); bool reportParseError(StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res); @@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); + bool parseSetAssignment(); + bool parseDirectiveWord(unsigned Size, SMLoc L); MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); @@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getReg(int RC,int RegNo); int getATReg(); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { @@ -211,25 +222,30 @@ private: MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + const MCExpr *Off; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - RegisterKind Kind; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - const MCExpr *Off; - } Mem; + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; SMLoc StartLoc, EndLoc; @@ -385,6 +401,56 @@ public: }; } +namespace llvm { +extern const MCInstrDesc MipsInsts[]; +} +static const MCInstrDesc &getInstDesc(unsigned Opcode) { + return MipsInsts[Opcode]; +} + +bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + Inst.setLoc(IDLoc); + if (MCID.mayLoad() || MCID.mayStore()) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions + for (unsigned i=0;i<MCID.getNumOperands();i++) { + const MCOperandInfo &OpInfo = MCID.OpInfo[i]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(i); + if (Op.isImm()) { + int MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + // Offset can't exceed 16bit value + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true); + return false; + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef){ + const MCSymbolRefExpr *SR = + static_cast<const MCSymbolRefExpr*>(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + // Expand symbol + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false); + return false; + } + } + } + } + } + } + + if (needsExpansion(Inst)) + expandInstruction(Inst, IDLoc, Instructions); + else + Instructions.push_back(Inst); + + return false; +} + bool MipsAsmParser::needsExpansion(MCInst &Inst) { switch(Inst.getOpcode()) { @@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, } } +void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd) { + const MCSymbolRefExpr *SR; + MCInst TempInst; + unsigned ImmOffset,HiOffset,LoOffset; + const MCExpr *ExprOffset; + unsigned TmpRegNum; + unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID: + Mips::CPURegsRegClassID, + getATReg()); + // 1st operand is either source or dst register + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + unsigned RegOpNum = Inst.getOperand(0).getReg(); + // 2nd operand is base register + assert(Inst.getOperand(1).isReg() && "expected register operand kind"); + unsigned BaseRegNum = Inst.getOperand(1).getReg(); + // 3rd operand is either immediate or expression + if (isImmOpnd) { + assert(Inst.getOperand(2).isImm() && "expected immediate operand kind"); + ImmOffset = Inst.getOperand(2).getImm(); + LoOffset = ImmOffset & 0x0000ffff; + HiOffset = (ImmOffset & 0xffff0000) >> 16; + // If msb of LoOffset is 1(negative number) we must increment HiOffset + if (LoOffset & 0x8000) + HiOffset++; + } + else + ExprOffset = Inst.getOperand(2).getExpr(); + // All instructions will have the same location + TempInst.setLoc(IDLoc); + // 1st instruction in expansion is LUi. For load instruction we can use + // the dst register as a temporary if base and dst are different, + // but for stores we must use $at + TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum; + TempInst.setOpcode(Mips::LUi); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(HiOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + SR = static_cast<const MCSymbolRefExpr*>(ExprOffset); + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_HI, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(HiExpr)); + } + } + // Add the instruction to the list + Instructions.push_back(TempInst); + // and prepare TempInst for next instruction + TempInst.clear(); + // which is add temp register to base + TempInst.setOpcode(Mips::ADDu); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); + Instructions.push_back(TempInst); + TempInst.clear(); + // and finaly, create original instruction with low part + // of offset and new base + TempInst.setOpcode(Inst.getOpcode()); + TempInst.addOperand(MCOperand::CreateReg(RegOpNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(LoOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_LO, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(LoExpr)); + } + } + Instructions.push_back(TempInst); + TempInst.clear(); +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; + SmallVector<MCInst, 8> Instructions; unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: { - if (needsExpansion(Inst)) { - SmallVector<MCInst, 4> Instructions; - expandInstruction(Inst, IDLoc, Instructions); - for(unsigned i =0; i < Instructions.size(); i++){ - Out.EmitInstruction(Instructions[i]); - } - } else { - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); - } + if (processInstruction(Inst,IDLoc,Instructions)) + return true; + for(unsigned i =0; i < Instructions.size(); i++) + Out.EmitInstruction(Instructions[i]); return false; } case Match_MissingFeature: @@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return false; } case AsmToken::Identifier: + // Look for the existing symbol, we should check if + // we need to assigne the propper RegisterKind + if (searchSymbolAlias(Operands,MipsOperand::Kind_None)) + return false; + //else drop to expression parsing case AsmToken::LParen: case AsmToken::Minus: case AsmToken::Plus: @@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { // Check the type of the expression if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { - // it's a constant, evaluate lo or hi value - int Val = MCE->getValue(); + // It's a constant, evaluate lo or hi value if (Str == "lo") { - Val = Val & 0xffff; + short Val = MCE->getValue(); + Res = MCConstantExpr::Create(Val, getContext()); } else if (Str == "hi") { + int Val = MCE->getValue(); int LoSign = Val & 0x8000; Val = (Val & 0xffff0000) >> 16; - //lower part is treated as signed int, so if it is negative - //we must add 1 to hi part to compensate + // Lower part is treated as a signed int, so if it is negative + // we must add 1 to the hi part to compensate if (LoSign) Val++; + Res = MCConstantExpr::Create(Val, getContext()); } - Res = MCConstantExpr::Create(Val, getContext()); return false; } if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { - // it's a symbol, create symbolic expression from symbol + // It's a symbol, create symbolic expression from symbol StringRef Symbol = MSRE->getSymbol().getName(); MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); @@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { switch(getLexer().getKind()) { default: return true; + case AsmToken::Identifier: case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: @@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!isMips64()) return MatchOperand_NoMatch; + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; } +bool MipsAsmParser:: +searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterKind) { + + MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); + if (Sym) { + SMLoc S = Parser.getTok().getLoc(); + const MCExpr *Expr; + if (Sym->isVariable()) + Expr = Sym->getVariableValue(); + else + return false; + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + const StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + // Lookup for the register with corresponding name + int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64()); + if (RegNum > -1) { + Parser.Lex(); + MipsOperand *op = MipsOperand::CreateReg(RegNum,S, + Parser.getTok().getLoc()); + op->setRegKind((MipsOperand::RegisterKind)RegisterKind); + Operands.push_back(op); + return true; + } + } + } else if (Expr->getKind() == MCExpr::Constant) { + Parser.Lex(); + const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr); + MipsOperand *op = MipsOperand::CreateImm(Const,S, + Parser.getTok().getLoc()); + Operands.push_back(op); + return true; + } + } + return false; +} MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { } bool MipsAsmParser::parseSetNoAtDirective() { - // line should look like: + // Line should look like: // .set noat // set at reg to 0 Options.setATReg(0); // eat noat Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } else if (getLexer().is(AsmToken::Equal)) { - getParser().Lex(); //eat '=' + getParser().Lex(); // eat '=' if (getLexer().isNot(AsmToken::Dollar)) { reportParseError("unexpected token in statement"); return false; } - Parser.Lex(); // eat '$' + Parser.Lex(); // Eat '$' const AsmToken &Reg = Parser.getTok(); if (Reg.is(AsmToken::Identifier)) { AtRegNo = matchCPURegisterName(Reg.getIdentifier()); @@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() { reportParseError("unexpected token in statement"); return false; } - getParser().Lex(); //eat reg + getParser().Lex(); // Eat reg if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); @@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() { bool MipsAsmParser::parseSetReorderDirective() { Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } + +bool MipsAsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + + if (Parser.parseIdentifier(Name)) + reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token in .set directive"); + Lex(); //eat comma + + if (Parser.parseExpression(Value)) + reportParseError("expected valid expression after comma"); + + // check if the Name already exists as a symbol + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + return reportParseError("symbol already defined"); + } + Sym = getContext().GetOrCreateSymbol(Name); + Sym->setVariableValue(Value); + + return false; +} bool MipsAsmParser::parseDirectiveSet() { // get next token @@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() { // ignore this directive for now Parser.eatToEndOfStatement(); return false; + } else { + // it is just an identifier, look for assignment + parseSetAssignment(); + return false; } return true; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 58aa1be34d..cf8bb189e4 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen Mips16FrameLowering.cpp Mips16InstrInfo.cpp + Mips16ISelDAGToDAG.cpp + Mips16ISelLowering.cpp Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp @@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen MipsRegisterInfo.cpp MipsSEFrameLowering.cpp MipsSEInstrInfo.cpp + MipsSEISelDAGToDAG.cpp + MipsSEISelLowering.cpp MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 96f93a0789..e198a7c983 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions or immediates"); @@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm()>>2; + assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions or an immediate"); diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp new file mode 100644 index 0000000000..00b3449300 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -0,0 +1,308 @@ +//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "Mips16ISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = Mips::Mflo16; + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = Mips::Mfhi16; + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC = + (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + V2 = RegInfo.createVirtualRegister(RC); + + BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); + BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) + .addReg(V1).addReg(V2); +} + +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + +void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + initMips16SPAliasReg(MF); +} + +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + +void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} + +bool Mips16DAGToDAGISel::selectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasFPIdx()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); (void)Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::AdduRxRyRz16; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SubuRxRyRz16; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + + EVT VT = LHS.getValueType(); + + unsigned Sltu_op = Mips::SltuRxRyRz16; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2); + unsigned Addu_op = Mips::AdduRxRyRz16; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT, + SDValue(Carry,0), RHS); + + SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry,0)); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16); + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + case ISD::MULHS: + case ISD::MULHU: { + MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16); + SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) { + return new Mips16DAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h new file mode 100644 index 0000000000..baa85877d9 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -0,0 +1,51 @@ +//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16ISELDAGTODAG_H +#define MIPS16ISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class Mips16DAGToDAGISel : public MipsDAGToDAGISel { +public: + explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, + EVT Ty, bool HasLo, bool HasHi); + + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); + + void initMips16SPAliasReg(MachineFunction &MF); +}; + +FunctionPass *createMips16ISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp new file mode 100644 index 0000000000..23eb5375ac --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -0,0 +1,689 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-lower" +#include "Mips16ISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <set> + +using namespace llvm; + +static cl::opt<bool> +Mips16HardFloat("mips16-hard-float", cl::NotHidden, + cl::desc("MIPS: mips16 hard float enable."), + cl::init(false)); + +static cl::opt<bool> DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); + +namespace { + std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded; +} + +Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // + // set up as if mips32 and then revert so we can test the mechanism + // for switching + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + computeRegisterProperties(); + clearRegisterClasses(); + + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + + if (Mips16HardFloat) + setMips16HardFloatLibCalls(); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMips16TargetLowering(MipsTargetMachine &TM) { + return new Mips16TargetLowering(TM); +} + +bool +Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + return false; +} + +MachineBasicBlock * +Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::SelBeqZ: + return emitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return emitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; + case Mips::SltCCRxRy16: + return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return emitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); + } +} + +bool Mips16TargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + // No tail call optimization for mips16. + return false; +} + +void Mips16TargetLowering::setMips16LibcallName + (RTLIB::Libcall L, const char *Name) { + setLibcallName(L, Name); + NoHelperNeeded.insert(Name); +} + +void Mips16TargetLowering::setMips16HardFloatLibCalls() { + setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); +} + + +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* Mips16TargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; + assert(stubNum <= maxStubNum); + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + +void Mips16TargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + SelectionDAG &DAG = CLI.DAG; + const char* Mips16HelperFunction = 0; + bool NeedMips16Helper = false; + + if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool LookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) { + if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { + LookupHelper = false; + } + } + if (LookupHelper) Mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); + + } + + SDValue JumpTarget = Callee; + + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned V0Reg = Mips::V0; + if (NeedMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } else + RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); + } + + Ops.push_back(JumpTarget); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock *Mips16TargetLowering:: +emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock *Mips16TargetLowering::emitSeliT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock + *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h new file mode 100644 index 0000000000..b23e2a1f37 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -0,0 +1,80 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef Mips16ISELLOWERING_H +#define Mips16ISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class Mips16TargetLowering : public MipsTargetLowering { + public: + explicit Mips16TargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + void setMips16LibcallName(RTLIB::Libcall, const char *Name); + + void setMips16HardFloatLibCalls(); + + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; +} + +#endif // Mips16ISELLOWERING_H diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index a9e9c52716..629382965b 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -15,7 +15,7 @@ // Mips Address // def addr16 : - ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; + ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>; // // Address operand diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 6dff548505..77b08cb11e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -12,19 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-isel" +#include "MipsISelDAGToDAG.h" +#include "Mips16ISelDAGToDAG.h" +#include "MipsSEISelDAGToDAG.h" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" -#include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -45,270 +45,11 @@ using namespace llvm; // MipsDAGToDAGISel - MIPS specific code to select MIPS machine // instructions for SelectionDAG operations. //===----------------------------------------------------------------------===// -namespace { - -class MipsDAGToDAGISel : public SelectionDAGISel { - - /// TM - Keep a reference to MipsTargetMachine. - MipsTargetMachine &TM; - - /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can - /// make the right decision when generating code for different targets. - const MipsSubtarget &Subtarget; - -public: - explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - - // Pass Name - virtual const char *getPassName() const { - return "MIPS DAG->DAG Pattern Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - -private: - // Include the pieces autogenerated from the target description. - #include "MipsGenDAGISel.inc" - - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. - const MipsTargetMachine &getTargetMachine() { - return static_cast<const MipsTargetMachine &>(TM); - } - - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. - const MipsInstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); - } - - SDNode *getGlobalBaseReg(); - - SDValue getMips16SPAliasReg(); - - void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); - - std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, - EVT Ty, bool HasLo, bool HasHi); - - SDNode *Select(SDNode *N); - - // Complex Pattern. - /// (reg + imm). - bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Fall back on this function if all else fails. - bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Match integer address pattern. - bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Alias); - - // getImm - Return a target constant with the specified value. - inline SDValue getImm(const SDNode *Node, uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); - } - - void ProcessFunctionAfterISel(MachineFunction &MF); - bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); - void InitGlobalBaseReg(MachineFunction &MF); - void InitMips16SPAliasReg(MachineFunction &MF); - - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); -}; - -} - -// Insert instructions to initialize the global base register in the -// first MBB of the function. When the ABI is O32 and the relocation model is -// PIC, the necessary instructions are emitted later to prevent optimization -// passes from moving them. -void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->globalBaseRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - const TargetRegisterClass *RC; - - if (Subtarget.isABI_N64()) - RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - else if (Subtarget.inMips16Mode()) - RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; - else - RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - V0 = RegInfo.createVirtualRegister(RC); - V1 = RegInfo.createVirtualRegister(RC); - V2 = RegInfo.createVirtualRegister(RC); - - if (Subtarget.isABI_N64()) { - MF.getRegInfo().addLiveIn(Mips::T9_64); - MBB.addLiveIn(Mips::T9_64); - - // lui $v0, %hi(%neg(%gp_rel(fname))) - // daddu $v1, $v0, $t9 - // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) - .addReg(Mips::T9_64); - BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - if (Subtarget.inMips16Mode()) { - BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); - BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) - .addReg(V1).addReg(V2); - return; - } - - if (MF.getTarget().getRelocationModel() == Reloc::Static) { - // Set global register to __gnu_local_gp. - // - // lui $v0, %hi(__gnu_local_gp) - // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); - return; - } - - MF.getRegInfo().addLiveIn(Mips::T9); - MBB.addLiveIn(Mips::T9); - - if (Subtarget.isABI_N32()) { - // lui $v0, %hi(%neg(%gp_rel(fname))) - // addu $v1, $v0, $t9 - // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - assert(Subtarget.isABI_O32()); - - // For O32 ABI, the following instruction sequence is emitted to initialize - // the global base register: - // - // 0. lui $2, %hi(_gp_disp) - // 1. addiu $2, $2, %lo(_gp_disp) - // 2. addu $globalbasereg, $2, $t9 - // - // We emit only the last instruction here. - // - // GNU linker requires that the first two instructions appear at the beginning - // of a function and no instructions be inserted before or between them. - // The two instructions are emitted during lowering to MC layer in order to - // avoid any reordering. - // - // Register $2 (Mips::V0) is added to the list of live-in registers to ensure - // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) - // reads it. - MF.getRegInfo().addLiveIn(Mips::V0); - MBB.addLiveIn(Mips::V0); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) - .addReg(Mips::V0).addReg(Mips::T9); -} - -// Insert instructions to initialize the Mips16 SP Alias register in the -// first MBB of the function. -// -void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->mips16SPAliasRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); - - BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) - .addReg(Mips::SP); -} - - -bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, - const MachineInstr& MI) { - unsigned DstReg = 0, ZeroReg = 0; - - // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". - if ((MI.getOpcode() == Mips::ADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO; - } else if ((MI.getOpcode() == Mips::DADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO_64) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO_64; - } - - if (!DstReg) - return false; - - // Replace uses with ZeroReg. - for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E;) { - MachineOperand &MO = U.getOperand(); - unsigned OpNo = U.getOperandNo(); - MachineInstr *MI = MO.getParent(); - ++U; - - // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) - continue; - - MO.setReg(ZeroReg); - } - - return true; -} - -void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { - InitGlobalBaseReg(MF); - InitMips16SPAliasReg(MF); - - MachineRegisterInfo *MRI = &MF.getRegInfo(); - - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; - ++MFI) - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - ReplaceUsesWithZeroReg(MRI, *I); -} bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - ProcessFunctionAfterISel(MF); + processFunctionAfterISel(MF); return Ret; } @@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -/// getMips16SPAliasReg - Output the instructions required to put the -/// SP into a Mips16 accessible aliased register. -SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { - unsigned Mips16SPAliasReg = - MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); -} - /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { - EVT ValTy = Addr.getValueType(); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; - } - - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - } - + llvm_unreachable("Unimplemented function."); return false; } bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); - return true; + llvm_unreachable("Unimplemented function."); + return false; } bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrRegImm(Addr, Base, Offset) || - selectAddrDefault(Addr, Base, Offset); -} - -void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); - if (Parent) { - switch (Parent->getOpcode()) { - case ISD::LOAD: { - LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - case ISD::STORE: { - StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - } - } - AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); - return; - -} -bool MipsDAGToDAGISel::SelectAddr16( - SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, - SDValue &Alias) { - EVT ValTy = Addr.getValueType(); - - Alias = CurDAG->getTargetConstant(0, ValTy); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - getMips16SPRefReg(Parent, Alias); - return true; - } - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - getMips16SPRefReg(Parent, Alias); - } - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; + llvm_unreachable("Unimplemented function."); + return false; } -/// Select multiply instructions. -std::pair<SDNode*, SDNode*> -MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, - bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; - SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0), - N->getOperand(1)); - SDValue InFlag = SDValue(Mul, 0); - - if (HasLo) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 : - (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag); - InFlag = SDValue(Lo, 1); - } - if (HasHi) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 : - (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); - Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag); - } - return std::make_pair(Lo, Hi); +bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias) { + llvm_unreachable("Unimplemented function."); + return false; } - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return NULL; } - /// - // Instruction Selection not handled by the auto-generated - // tablegen selection should be handled here. - /// - EVT NodeTy = Node->getValueType(0); - unsigned MultOpc; + // See if subclasses can handle this node. + std::pair<bool, SDNode*> Ret = selectNode(Node); + + if (Ret.first) + return Ret.second; switch(Opcode) { default: break; - case ISD::SUBE: - case ISD::ADDE: { - bool inMips16Mode = Subtarget.inMips16Mode(); - SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned MOp; - if (Opcode == ISD::ADDE) { - CmpLHS = InFlag.getValue(0); - if (inMips16Mode) - MOp = Mips::AdduRxRyRz16; - else - MOp = Mips::ADDu; - } else { - CmpLHS = InFlag.getOperand(0); - if (inMips16Mode) - MOp = Mips::SubuRxRyRz16; - else - MOp = Mips::SUBu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; - - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - - EVT VT = LHS.getValueType(); - - unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; - SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); - unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; - SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, - SDValue(Carry,0), RHS); - - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, - LHS, SDValue(AddCarry,0)); - } - - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : - Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - - std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy, - true, true); - - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - - return NULL; - } - - /// Special Muls - case ISD::MUL: { - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && NodeTy == MVT::i32) - break; - return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT, - dl, NodeTy, true, false).first; - } - case ISD::MULHS: - case ISD::MULHU: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::MULHU ? - Mips::MultuRxRy16 : Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); - - return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second; - } - // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); - case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.hasMips64()) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO_64, MVT::i64); - return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero); - } - - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO, MVT::i32); - return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, - Zero); - } - break; - } - - case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); - unsigned Size = CN->getValueSizeInBits(0); - - if (Size == 32) - break; - - MipsAnalyzeImmediate AnalyzeImm; - int64_t Imm = CN->getSExtValue(); - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, false); - - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - DebugLoc DL = CN->getDebugLoc(); - SDNode *RegOpnd; - SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - - // The first instruction can be a LUi which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == Mips::LUi64) - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); - else - RegOpnd = - CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - CurDAG->getRegister(Mips::ZERO_64, MVT::i64), - ImmOpnd); - - // The remaining instructions in the sequence are handled here. - for (++Inst; Inst != Seq.end(); ++Inst) { - ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - SDValue(RegOpnd, 0), ImmOpnd); - } - - return RegOpnd; - } - #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: @@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { "Unexpected unaligned loads/stores."); break; #endif - - case MipsISD::ThreadPointer: { - EVT PtrVT = TLI.getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; - - if (PtrVT == MVT::i32) { - RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; - DestReg = Mips::V1; - } else { - RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; - DestReg = Mips::V1_64; - } - - SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), - Node->getValueType(0), - CurDAG->getRegister(SrcReg, PtrVT)); - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); - ReplaceUses(SDValue(Node, 0), ResNode); - return ResNode.getNode(); - } } // Select the default instruction @@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { - return new MipsDAGToDAGISel(TM); + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16ISelDag(TM); + + return llvm::createMipsSEISelDag(TM); } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h new file mode 100644 index 0000000000..cf0f9c58aa --- /dev/null +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -0,0 +1,93 @@ +//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSISELDAGTODAG_H +#define MIPSISELDAGTODAG_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class MipsDAGToDAGISel : public SelectionDAGISel { +public: + explicit MipsDAGToDAGISel(MipsTargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + +protected: + SDNode *getGlobalBaseReg(); + + /// Keep a pointer to the MipsSubtarget around so that we can make the right + /// decision when generating code for different targets. + const MipsSubtarget &Subtarget; + +private: + // Include the pieces autogenerated from the target description. + #include "MipsGenDAGISel.inc" + + // Complex Pattern. + /// (reg + imm). + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Fall back on this function if all else fails. + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match integer address pattern. + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual SDNode *Select(SDNode *N); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0; + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + } + + virtual void processFunctionAfterISel(MachineFunction &MF) = 0; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +}; + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c452dee8d0..4bf43f42e3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" -#include <set> #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -42,26 +41,9 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); - -static cl::opt<bool> LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); -static cl::opt<bool> -Mips16HardFloat("mips16-hard-float", cl::NotHidden, - cl::desc("MIPS: mips16 hard float enable."), - cl::init(false)); - -static cl::opt<bool> DontExpandCondPseudos16( - "mips16-dont-expand-cond-pseudo", - cl::init(false), - cl::desc("Dont expand conditional move related " - "pseudos for Mips 16"), - cl::Hidden); - - static const uint16_t O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } -static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) { +SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } @@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { +SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, + bool HasMips64) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; @@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { +SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, + unsigned Flag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), @@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { MachinePointerInfo::getGOT(), false, false, false, 0); } -static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag) { +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, + unsigned LoFlag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); @@ -211,110 +196,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -namespace { - struct LTStr { - bool operator()(const char *S1, const char *S2) const - { - return strcmp(S1, S2) < 0; - } - }; - - std::set<const char*, LTStr> NoHelperNeeded; -} - -void MipsTargetLowering::setMips16LibcallName - (RTLIB::Libcall L, const char *Name) { - setLibcallName(L, Name); - NoHelperNeeded.insert(Name); -} - -void MipsTargetLowering::setMips16HardFloatLibCalls() { - setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); -} - MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(&TM.getSubtarget<MipsSubtarget>()), HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()), IsO32(Subtarget->isABI_O32()) { - // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - // Set up the register classes - addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); - - if (HasMips64) - addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); - - if (Subtarget->inMips16Mode()) { - addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - if (Mips16HardFloat) - setMips16HardFloatLibCalls(); - } - - if (Subtarget->hasDSP()) { - MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; - - for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); - - // Expand all builtin opcodes. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VecTys[i], Expand); - - setOperationAction(ISD::LOAD, VecTys[i], Legal); - setOperationAction(ISD::STORE, VecTys[i], Legal); - setOperationAction(ISD::BITCAST, VecTys[i], Legal); - } - } - - if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f32, &Mips::FGR32RegClass); - - // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (HasMips64) - addRegisterClass(MVT::f64, &Mips::FGR64RegClass); - else - addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); - } - } - // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -348,18 +240,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - } - else { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - } - if (!Subtarget->inMips16Mode()) { - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - } if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -472,21 +352,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - } - setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -523,7 +388,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); - computeRegisterProperties(); setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); @@ -531,22 +395,11 @@ MipsTargetLowering(MipsTargetMachine &TM) MaxStoresPerMemcpy = 16; } -bool -MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { - MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - - if (Subtarget->inMips16Mode()) - return false; +const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16TargetLowering(TM); - switch (SVT) { - case MVT::i64: - case MVT::i32: - if (Fast) - *Fast = true; - return true; - default: - return false; - } + return llvm::createMipsSETargetLowering(TM); } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { @@ -1099,347 +952,6 @@ static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) { } MachineBasicBlock * -MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ - // $bb: - // bposge32_pseudo $vr0 - // => - // $bb: - // bposge32 $tbb - // $fbb: - // li $vr2, 0 - // b $sink - // $tbb: - // li $vr1, 1 - // $sink: - // $vr0 = phi($vr2, $fbb, $vr1, $tbb) - - MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetRegisterClass *RC = &Mips::CPURegsRegClass; - DebugLoc DL = MI->getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); - MachineFunction *F = BB->getParent(); - MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, FBB); - F->insert(It, TBB); - F->insert(It, Sink); - - // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - Sink->transferSuccessorsAndUpdatePHIs(BB); - - // Add successors. - BB->addSuccessor(FBB); - BB->addSuccessor(TBB); - FBB->addSuccessor(Sink); - TBB->addSuccessor(Sink); - - // Insert the real bposge32 instruction to $BB. - BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); - - // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) - .addReg(Mips::ZERO).addImm(0); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); - - // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); - BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) - .addReg(Mips::ZERO).addImm(1); - - // Insert phi function to $Sink. - BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), - MI->getOperand(0).getReg()) - .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return Sink; -} - -MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) - .addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - -MachineBasicBlock *MipsTargetLowering::emitSelT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addReg(MI->getOperand(4).getReg()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock *MipsTargetLowering::emitSeliT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addImm(MI->getOperand(4).getImm()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock - *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - unsigned regY = MI->getOperand(1).getReg(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - int64_t imm = MI->getOperand(1).getImm(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - unsigned CmpOpc; - if (isUInt<8>(imm)) - CmpOpc = CmpiOpc; - else if (isUInt<16>(imm)) - CmpOpc = CmpiXOpc; - else - llvm_unreachable("immediate field not usable"); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -static unsigned Mips16WhichOp8uOr16simm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - unsigned regY = MI->getOperand(2).getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - int64_t Imm = MI->getOperand(2).getImm(); - unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addImm(Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} -MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { @@ -1548,77 +1060,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return emitAtomicCmpSwap(MI, BB, 8); - case Mips::BPOSGE32_PSEUDO: - return emitBPOSGE32(MI, BB); - case Mips::SelBeqZ: - return emitSel16(Mips::BeqzRxImm16, MI, BB); - case Mips::SelBneZ: - return emitSel16(Mips::BnezRxImm16, MI, BB); - case Mips::SelTBteqZCmpi: - return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBteqZSlti: - return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBteqZSltiu: - return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBtneZCmpi: - return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBtneZSlti: - return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBtneZSltiu: - return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBteqZCmp: - return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBteqZSlt: - return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBteqZSltu: - return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::SelTBtneZCmp: - return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBtneZSlt: - return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBtneZSltu: - return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::BteqzT8SltX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::BteqzT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::BtnezT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::BtnezT8SltX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::BtnezT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - break; - case Mips::SltCCRxRy16: - return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); - break; - case Mips::SltiCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::SltiuCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SltuCCRxRy16: - return emitFEXT_CCRX16_ins - (Mips::SltuRxRy16, MI, BB); } } @@ -3074,28 +2515,6 @@ static unsigned getNextIntArgReg(unsigned Reg) { return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -/// isEligibleForTailCallOptimization - Check whether the call is eligible -/// for tail call optimization. -bool MipsTargetLowering:: -isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const { - if (!EnableMipsTailCalls) - return false; - - // No tail call optimization for mips16. - if (Subtarget->inMips16Mode()) - return false; - - // Return false if either the callee or caller has a byval argument. - if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) - return false; - - // Return true if the callee's argument area is no larger than the - // caller's. - return NextStackOffset <= FI.getIncomingArgSize(); -} - SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, DebugLoc DL, @@ -3114,161 +2533,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } -// -// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much -// cleaner way to do all of this but it will have to wait until the traditional -// gcc mechanism is completed. -// -// For Pic, in order for Mips16 code to call Mips32 code which according the abi -// have either arguments or returned values placed in floating point registers, -// we use a set of helper functions. (This includes functions which return type -// complex which on Mips are returned in a pair of floating point registers). -// -// This is an encoding that we inherited from gcc. -// In Mips traditional O32, N32 ABI, floating point numbers are passed in -// floating point argument registers 1,2 only when the first and optionally -// the second arguments are float (sf) or double (df). -// For Mips16 we are only concerned with the situations where floating point -// arguments are being passed in floating point registers by the ABI, because -// Mips16 mode code cannot execute floating point instructions to load those -// values and hence helper functions are needed. -// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) -// the helper function suffixs for these are: -// 0, 1, 5, 9, 2, 6, 10 -// this suffix can then be calculated as follows: -// for a given argument Arg: -// Arg1x, Arg2x = 1 : Arg is sf -// 2 : Arg is df -// 0: Arg is neither sf or df -// So this stub is the string for number Arg1x + Arg2x*4. -// However not all numbers between 0 and 10 are possible, we check anyway and -// assert if the impossible exists. -// - -unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber - (ArgListTy &Args) const { - unsigned int resultNum = 0; - if (Args.size() >= 1) { - Type *t = Args[0].Ty; - if (t->isFloatTy()) { - resultNum = 1; - } - else if (t->isDoubleTy()) { - resultNum = 2; - } - } - if (resultNum) { - if (Args.size() >=2) { - Type *t = Args[1].Ty; - if (t->isFloatTy()) { - resultNum += 4; - } - else if (t->isDoubleTy()) { - resultNum += 8; - } - } +void MipsTargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // Insert node "GP copy globalreg" before call to function. + // + // R_MIPS_CALL* operators (emitted when non-internal functions are called + // in PIC mode) allow symbols to be resolved via lazy binding. + // The lazy binding stub requires GP to point to the GOT. + if (IsPICCall && !InternalLinkage) { + unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } - return resultNum; -} -// -// prefixs are attached to stub numbers depending on the return type . -// return type: float sf_ -// double df_ -// single complex sc_ -// double complext dc_ -// others NO PREFIX -// -// -// The full name of a helper function is__mips16_call_stub + -// return type dependent prefix + stub number -// -// -// This is something that probably should be in a different source file and -// perhaps done differently but my main purpose is to not waste runtime -// on something that we can enumerate in the source. Another possibility is -// to have a python script to generate these mapping tables. This will do -// for now. There are a whole series of helper function mapping arrays, one -// for each return type class as outlined above. There there are 11 possible -// entries. Ones with 0 are ones which should never be selected -// -// All the arrays are similar except for ones which return neither -// sf, df, sc, dc, in which only care about ones which have sf or df as a -// first parameter. -// -#define P_ "__mips16_call_stub_" -#define MAX_STUB_NUMBER 10 -#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" -#define T P "0" , T1 -#define P P_ -static char const * vMips16Helper[MAX_STUB_NUMBER+1] = - {0, T1 }; -#undef P -#define P P_ "sf_" -static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "df_" -static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "sc_" -static char const * scMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "dc_" -static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#undef P_ - - -const char* MipsTargetLowering:: - getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const { - const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); -#ifndef NDEBUG - const unsigned int maxStubNum = 10; - assert(stubNum <= maxStubNum); - const bool validStubNum[maxStubNum+1] = - {true, true, true, false, false, true, true, false, false, true, true}; - assert(validStubNum[stubNum]); -#endif - const char *result; - if (RetTy->isFloatTy()) { - result = sfMips16Helper[stubNum]; - } - else if (RetTy ->isDoubleTy()) { - result = dfMips16Helper[stubNum]; - } - else if (RetTy->isStructTy()) { - // check if it's complex - if (RetTy->getNumContainedTypes() == 2) { - if ((RetTy->getContainedType(0)->isFloatTy()) && - (RetTy->getContainedType(1)->isFloatTy())) { - result = scMips16Helper[stubNum]; - } - else if ((RetTy->getContainedType(0)->isDoubleTy()) && - (RetTy->getContainedType(1)->isDoubleTy())) { - result = dcMips16Helper[stubNum]; - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - if (stubNum == 0) { - needHelper = false; - return ""; - } - result = vMips16Helper[stubNum]; + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } - needHelper = true; - return result; + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + + if (InFlag.getNode()) + Ops.push_back(InFlag); } /// LowerCall - functions arguments are copied from virtual regs to @@ -3287,26 +2593,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; - const char* Mips16HelperFunction = 0; - bool NeedMips16Helper = false; - - if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && - Mips16HardFloat) { - // - // currently we don't have symbols tagged with the mips16 or mips32 - // qualifier so we will assume that we don't know what kind it is. - // and generate the helper - // - bool LookupHelper = true; - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { - LookupHelper = false; - } - } - if (LookupHelper) Mips16HelperFunction = - getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); - - } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -3466,80 +2752,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GlobalOrExternal = true; } - SDValue JumpTarget = Callee; - - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - unsigned V0Reg = Mips::V0; - if (NeedMips16Helper) { - RegsToPass.push_front(std::make_pair(V0Reg, Callee)); - JumpTarget = DAG.getExternalSymbol( - Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); - } - else { - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - - if (!Subtarget->inMips16Mode()) - JumpTarget = SDValue(); - } - } - - // Insert node "GP copy globalreg" before call to function. - // - // R_MIPS_CALL* operators (emitted when non-internal functions are called - // in PIC mode) allow symbols to be resolved via lazy binding. - // The lazy binding stub requires GP to point to the GOT. - if (IsPICCall && !InternalLinkage) { - unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; - EVT Ty = IsN64 ? MVT::i64 : MVT::i32; - RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty))); - } - - // Build a sequence of copy-to-reg nodes chained together with token - // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be - // stuck together. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // MipsJmpLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... - // - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (JumpTarget.getNode()) - Ops.push_back(JumpTarget); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - if (InFlag.getNode()) - Ops.push_back(InFlag); + getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, + CLI, Callee, Chain); if (IsTailCall) return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size()); Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); + SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, @@ -4124,14 +3347,14 @@ static bool isF128SoftLibCall(const char *CallSym) { const char * const *End = LibCalls + array_lengthof(LibCalls); // Check that LibCalls is sorted alphabetically. -#ifndef NDEBUG - LTStr Comp; + MipsTargetLowering::LTStr Comp; +#ifndef NDEBUG for (const char * const *I = LibCalls; I < End - 1; ++I) assert(Comp(*I, *(I + 1))); #endif - return std::binary_search(LibCalls, End, CallSym, LTStr()); + return std::binary_search(LibCalls, End, CallSym, Comp); } /// This function returns true if Ty is fp128 or i128 which was originally a diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index de925e16ab..71977d7f1c 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -152,9 +152,9 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + static const MipsTargetLowering *create(MipsTargetMachine &TM); - virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -177,17 +177,34 @@ namespace llvm { EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - private: - void setMips16LibcallName(RTLIB::Libcall, const char *Name); + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + struct LTStr { + bool operator()(const char *S1, const char *S2) const { + return strcmp(S1, S2) < 0; + } + }; + + protected: + SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - void setMips16HardFloatLibCalls(); + SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; - unsigned int - getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; - const char *getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, unsigned LoFlag) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; /// ByValArgInfo - Byval argument information. struct ByValArgInfo { @@ -283,6 +300,7 @@ namespace llvm { bool HasMips64, IsN64, IsO32; + private: // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -321,9 +339,10 @@ namespace llvm { /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. - bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const = 0; /// copyByValArg - Copy argument registers which were used to pass a byval /// argument to the stack. Create a stack frame object for the byval @@ -377,10 +396,6 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; - // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; @@ -419,8 +434,6 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, - MachineBasicBlock *BB) const; MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI, @@ -430,29 +443,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; - MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const; - }; + + /// Create MipsTargetLowering objects. + const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM); + const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM); } #endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 25b5d240be..022987f12e 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -256,6 +256,7 @@ def mem : Operand<i32> { let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem64 : Operand<i64> { @@ -263,18 +264,21 @@ def mem64 : Operand<i64> { let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem_ea : Operand<i32> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } def mem_ea_64 : Operand<i64> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } // size operand of ext instruction diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 13b2a6ac17..3c210e71bb 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -58,7 +58,8 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f93dd86c17..6d76e8a54a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -18,6 +18,10 @@ def sub_lo : SubRegIndex; def sub_hi : SubRegIndex; } +class Unallocatable { + bit isAllocatable = 0; +} + // We have banks of 32 registers each. class MipsReg<bits<16> Enc, string n> : Register<n> { let HWEncoding = Enc; @@ -291,9 +295,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add // Callee save S0, S1)>; -def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable; -def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -319,18 +323,19 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; +def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable; // Hardware registers -def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; -def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; +def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; +def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable; // Accumulator Registers -def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; +def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>, + Unallocatable; def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp new file mode 100644 index 0000000000..e22c3c8b3f --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -0,0 +1,460 @@ +//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "MipsSEISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + + +bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E;) { + MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); + MachineInstr *MI = MO.getParent(); + ++U; + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC; + + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + + if (Subtarget.isABI_N64()) { + MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); + + // lui $v0, %hi(%neg(%gp_rel(fname))) + // daddu $v1, $v0, $t9 + // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + if (MF.getTarget().getRelocationModel() == Reloc::Static) { + // Set global register to __gnu_local_gp. + // + // lui $v0, %hi(__gnu_local_gp) + // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + return; + } + + MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + assert(Subtarget.isABI_O32()); + + // For O32 ABI, the following instruction sequence is emitted to initialize + // the global base register: + // + // 0. lui $2, %hi(_gp_disp) + // 1. addiu $2, $2, %lo(_gp_disp) + // 2. addu $globalbasereg, $2, $t9 + // + // We emit only the last instruction here. + // + // GNU linker requires that the first two instructions appear at the beginning + // of a function and no instructions be inserted before or between them. + // The two instructions are emitted during lowering to MC layer in order to + // avoid any reordering. + // + // Register $2 (Mips::V0) is added to the list of live-in registers to ensure + // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) + // reads it. + MF.getRegInfo().addLiveIn(Mips::V0); + MBB.addLiveIn(Mips::V0); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) + .addReg(Mips::V0).addReg(Mips::T9); +} + +void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + replaceUsesWithZeroReg(MRI, *I); +} + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, DebugLoc DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, + SDValue(Carry, 0), RHS); + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry, 0)); +} + +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + EVT ValTy = Addr.getValueType(); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; + } + + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + } + + return false; +} + +bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); + return true; +} + +bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + SDNode *Result; + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); + return std::make_pair(true, Result); + } + + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); + + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + /// Special Muls + case ISD::MUL: { + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && NodeTy == MVT::i32) + break; + MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; + Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; + return std::make_pair(true, Result); + } + case ISD::MULHS: + case ISD::MULHU: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); + + Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget.hasMips64()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO_64, MVT::i64); + Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); + } else { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO, MVT::i32); + Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, + Zero); + } + + return std::make_pair(true, Result); + } + break; + } + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); + unsigned Size = CN->getValueSizeInBits(0); + + if (Size == 32) + break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); + } + + return std::make_pair(true, RegOpnd); + } + + case MipsISD::ThreadPointer: { + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = + CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), + CurDAG->getRegister(SrcReg, PtrVT)); + SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, + SDValue(Rdhwr, 0)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + ReplaceUses(SDValue(Node, 0), ResNode); + return std::make_pair(true, ResNode.getNode()); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { + return new MipsSEDAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h new file mode 100644 index 0000000000..6137ab040b --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -0,0 +1,57 @@ +//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEISELDAGTODAG_H +#define MIPSSEISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { + +public: + explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); + + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl, + EVT Ty, bool HasLo, bool HasHi); + + SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + DebugLoc DL, SDNode *Node) const; + + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); +}; + +FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp new file mode 100644 index 0000000000..287e2ede0e --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -0,0 +1,197 @@ +//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// +#include "MipsSEISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + +MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + + if (HasMips64) + addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); + + if (Subtarget->hasDSP()) { + MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } + } + + if (!TM.Options.UseSoftFloat) { + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, &Mips::FGR64RegClass); + else + addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); + } + } + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { + return new MipsSETargetLowering(TM); +} + + +bool +MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } +} + +MachineBasicBlock * +MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::BPOSGE32_PSEUDO: + return emitBPOSGE32(MI, BB); + } +} + +bool MipsSETargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + if (!EnableMipsTailCalls) + return false; + + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +void MipsSETargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); + } else + Ops.push_back(Callee); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock * MipsSETargetLowering:: +emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h new file mode 100644 index 0000000000..04a28ce54c --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -0,0 +1,46 @@ +//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MipsSEISELLOWERING_H +#define MipsSEISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class MipsSETargetLowering : public MipsTargetLowering { + public: + explicit MipsSETargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; + }; +} + +#endif // MipsSEISELLOWERING_H diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index fd930f0335..33363580ab 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c4928c21eb..7e5f192264 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine { const DataLayout DL; // Calculates type size & alignment OwningPtr<const MipsInstrInfo> InstrInfo; OwningPtr<const MipsFrameLowering> FrameLowering; - MipsTargetLowering TLInfo; + OwningPtr<const MipsTargetLowering> TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; @@ -63,7 +63,7 @@ public: } virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; + return TLInfo.get(); } virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 7917f7736e..709daa4e4b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 4a420929d0..38a7420d97 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 972e13852e..b0680fbb8c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -17,6 +17,10 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc489f6..81a54d7015 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + int64_t start = DefInstr->getOperand(2).getImm(); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, @@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index caeb1796f7..2a680661d3 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -136,3 +136,8 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV F27, F28, F29, F30, F31, CR2, CR3, CR4, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; + +def CSR_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; + diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a396e6693..0108f7256e 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -281,6 +299,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } +void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { + bool is31 = needsFP(MF); + unsigned FPReg = is31 ? PPC::R31 : PPC::R1; + unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) + for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + --MBBI; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg()) + continue; + + switch (MO.getReg()) { + case PPC::FP: + MO.setReg(FPReg); + break; + case PPC::FP8: + MO.setReg(FP8Reg); + break; + } + } + } +} void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -311,13 +354,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (MFI->isFrameAddressTaken()) + replaceFPWithRealFP(MF); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get operating system @@ -780,7 +822,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +864,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +880,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -1031,6 +1059,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } } bool diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d09e47fafd..6f5f9368c6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -41,10 +43,13 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; + void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -144,6 +149,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 17bea8a6a6..90a3a5c6a4 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -164,6 +164,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13cb358fc0..c7d454692e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); + PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -208,6 +212,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. @@ -564,6 +576,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; + case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; @@ -1028,7 +1042,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1077,7 +1091,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1140,7 +1154,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1180,13 +1194,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); } else return false; @@ -1205,6 +1221,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -4555,6 +4575,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } +SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -5551,6 +5586,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, @@ -5744,7 +5782,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -5863,9 +5901,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } +llvm::MachineBasicBlock* +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + // For v = setjmp(buf), we generate + // + // thisMBB: + // SjLjSetup mainMBB + // bl mainMBB + // v_restore = 1 + // b sinkMBB + // + // mainMBB: + // buf[LabelOffset] = LR + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Note that the structure of the jmp_buf used here is not compatible + // with that used by libc, and is not designed to be. Specifically, it + // stores only those 'reserved' registers that LLVM does not otherwise + // understand how to spill. Also, by convention, by the time this + // intrinsic is called, Clang has already stored the frame address in the + // first slot of the buffer and stack address in the third. Following the + // X86 target code, we'll store the jump address in the second slot. We also + // need to save the TOC pointer (R2) to handle jumps between shared + // libraries, and that will be stored in the fourth slot. The thread + // identifier (R13) is not affected. + + // thisMBB: + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + // Prepare IP either in reg. + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned LabelReg = MRI.createVirtualRegister(PtrRC); + unsigned BufReg = MI->getOperand(1).getReg(); + + if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) + .addReg(PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB); + MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + + BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) + .addMBB(mainMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); + + thisMBB->addSuccessor(mainMBB, /* weight */ 0); + thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + + // mainMBB: + // mainDstReg = 0 + MIB = BuildMI(mainMBB, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + + // Store IP + if (PPCSubTarget.isPPC64()) { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) + .addReg(LabelReg) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) + .addReg(LabelReg) + .addImm(LabelOffset) + .addReg(BufReg); + } + + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(PPC::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(thisMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; + unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + unsigned BufReg = MI->getOperand(0).getReg(); + + // Reload FP (the jumped-to function may not have had a + // frame pointer, and if so, then its r31 will be restored + // as necessary). + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) + .addImm(0) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) + .addImm(0) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload IP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) + .addImm(LabelOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload SP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) + .addImm(SPOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) + .addImm(SPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // FIXME: When we also support base pointers, that register must also be + // restored here. + + // Reload TOC + if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Jump + BuildMI(*MBB, MI, DL, + TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); + BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + return emitEHSjLjSetJmp(MI, BB); + } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + return emitEHSjLjLongJmp(MI, BB); + } + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -6133,7 +6400,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -6622,6 +6889,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); + return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); @@ -6806,13 +7076,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()) && - MFI->getStackSize() && - !MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); - unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : - (is31 ? PPC::R31 : PPC::R1); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned FrameReg; + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + FrameReg = isPPC64 ? PPC::X1 : PPC::R1; + else + FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) @@ -6851,6 +7124,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3931384d89..1b0427e021 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -119,6 +120,12 @@ namespace llvm { /// are undefined. MFCR, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 @@ -321,6 +328,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; + const PPCRegisterInfo *PPCRegInfo; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -395,6 +403,12 @@ namespace llvm { MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -449,6 +463,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd @@ -604,6 +622,9 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 01201304f7..d2b1d6d392 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -30,12 +30,12 @@ def symbolLo64 : Operand<i64> { let EncoderMethod = "getLO16Encoding"; } def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); + let MIOperandInfo = (ops i64imm:$imm); } def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 let PrintMethod = "printMemRegImm"; let EncoderMethod = "getMemRIXEncoding"; - let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo64:$off, ptr_rc_nor0:$reg); } def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; @@ -273,6 +273,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -393,15 +406,16 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; } -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, s16imm64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), + [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>; +def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + [(set G8RC:$rD, (add G8RC_NOX0:$rA, + imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), @@ -536,7 +550,7 @@ def RLWINM8 : MForm_2<21, []>; def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } // End FXU Operations. @@ -555,7 +569,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set G8RC:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, @@ -567,25 +582,26 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in -def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, - ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLHAU, - []>, RegConstraint<"$rA = $ea_result">, +let mayLoad = 1 in { +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { @@ -612,30 +628,30 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -648,7 +664,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; + [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64; def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), "ld $rD, $src", LdStLD, []>, isPPC64; @@ -684,12 +700,12 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -798,82 +814,11 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; -} - -let PPC970_Unit = 2 in { - -def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; - - -def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStSTDU, - [(set ptr_rc:$ea_res, - (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked, isPPC64; - // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), "std $rT, $dst", LdStSTD, @@ -884,6 +829,60 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), PPC970_DGroup_Cracked; } +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), + "stdu $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STBU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STHU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STWU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(aligned4pre_store G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STDU G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; + +def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STBUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STHUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STWUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STDUX G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; //===----------------------------------------------------------------------===// @@ -979,3 +978,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), (ADDIS8 G8RC:$in, tjumptable:$g)>; def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS8 G8RC:$in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst), + (STDX G8RC:$rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0cf28ae4b5..0ed7ff2cc4 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), (VRFIZ VRRC:$vA)>; def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), (VRFIN VRRC:$vA)>; + +} // end HasAltivec + diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index c3c171cd21..16dbee5d29 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, let CR = 0; } +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a0517a80a9..cf39386c67 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,11 +33,6 @@ #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { @@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +522,20 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +552,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,7 +578,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), @@ -642,37 +605,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +638,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_VRSAVE), + DestReg), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +667,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 374213ea43..5d4ae915b4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e94342d..84cdb1f39d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -158,6 +158,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -278,6 +286,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -314,9 +354,6 @@ def s16imm : Operand<i32> { def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; } -def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. - let PrintMethod = "printS16X4ImmOperand"; -} def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; @@ -344,21 +381,30 @@ def crbitm: Operand<i8> { let EncoderMethod = "get_crbitm_encoding"; } // Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def ptr_rc_nor0 : PointerLikeRegClass<1>; + def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$offreg, ptr_rc:$ptrreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } +// A single-register address. This is used with the SjLj +// pseudo-instructions. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc:$ptrreg); +} + // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. def pred : PredicateOperand<OtherVT, (ops imm, CRRC), @@ -372,6 +418,10 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; @@ -473,6 +523,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } } +// The direct BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; + } +} + // Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions @@ -551,6 +609,23 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), @@ -708,69 +783,69 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -835,44 +910,39 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2 in { -def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +let PPC970_Unit = 2, mayStore = 1 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), + "stfsu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), + "stfdu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STBU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STHU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store F4RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STFSU F4RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store F8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STFDU F8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; // Indexed (r+r) Stores. -// let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, @@ -887,48 +957,6 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), - (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), - (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, @@ -950,6 +978,44 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), [(store F8RC:$frS, xaddr:$dst)]>; } +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), + "stfsux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), + "stfdux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STBUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STHUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STWUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STFSUX F4RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STFDUX F8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -959,12 +1025,12 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; -def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), + [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>; +def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; + [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, @@ -974,12 +1040,13 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + [(set GPRC:$rD, (add GPRC_NOR0:$rA, + imm16ShiftedSExt:$imm))]>; +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, - [(set GPRC:$rD, (add GPRC:$rA, + [(set GPRC:$rD, (add GPRC_NOR0:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, @@ -1213,6 +1280,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; +let isCodeGenOnly = 1 in { + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1435,7 +1525,7 @@ let Uses = [RM] in { let PPC970_Unit = 1 in { // FXU Operations. def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375dd8..b1636a20b8 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; @@ -78,6 +85,8 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), LRStoreRequired(false), MinReservedArea(0), @@ -109,6 +118,12 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index df245cc655..75e20e4a26 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,26 +46,8 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, @@ -89,17 +71,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; } -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); -} - - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + if (Kind == 1) { + if (Subtarget.isPPC64()) + return &PPC::G8RC_NOX0RegClass; + return &PPC::GPRC_NOR0RegClass; + } + if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -123,11 +105,33 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } +const uint32_t* +PPCRegisterInfo::getNoPreservedMask() const { + // The naming here is inverted: The CSR_NoRegs_Altivec has the + // Altivec registers masked so that they're not saved and restored around + // instructions with this preserved mask. + + if (!Subtarget.hasAltivec()) + return CSR_NoRegs_Altivec_RegMask; + + return CSR_NoRegs_RegMask; +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + // The ZERO register is not really a register, but the representation of r0 + // when used in instructions that treat r0 as the constant 0. + Reserved.set(PPC::ZERO); + Reserved.set(PPC::ZERO8); + + // The FP register is also not really a register, but is the representation + // of the frame pointer register used by ISD::FRAMEADDR. + Reserved.set(PPC::FP); + Reserved.set(PPC::FP8); + Reserved.set(PPC::R0); Reserved.set(PPC::R1); Reserved.set(PPC::LR); @@ -139,35 +143,22 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. - // Note that this is over conservative, as it also prevents allocation of R31 - // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); - Reserved.set(PPC::R31); Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); - Reserved.set(PPC::X31); + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::X31); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; + case PPC::G8RC_NOX0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -271,28 +236,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -302,17 +255,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -369,8 +315,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -412,8 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -438,6 +382,65 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + unsigned Reg = PPC::R0; + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + unsigned Reg = PPC::R0; + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_VRSAVE does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) + .addReg(Reg); + + // Discard the pseudo instruction. + MBB.erase(II); +} + bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { @@ -498,15 +501,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::SPILL_VRSAVE) { + lowerVRSAVESpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_VRSAVE) { + lowerVRSAVERestore(II, FrameIndex, SPAdj, RS); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -529,7 +536,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, isIXAddr = true; break; } - + + bool noImmForm = false; + switch (OpC) { + case PPC::LVEBX: + case PPC::LVEHX: + case PPC::LVEWX: + case PPC::LVX: + case PPC::LVXL: + case PPC::LVSL: + case PPC::LVSR: + case PPC::STVEBX: + case PPC::STVEHX: + case PPC::STVEWX: + case PPC::STVX: + case PPC::STVXL: + noImmForm = true; + break; + } + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -553,7 +578,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -563,13 +589,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC); // Insert a set of rA with the full offset value before the ld, st, or add BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) @@ -584,7 +606,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9840666242..ba0fb48b85 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -44,16 +44,22 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; @@ -61,6 +67,11 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + void lowerVRSAVERestore(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 8ee9b1ec9f..55d36bbfc9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -86,6 +86,14 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "r0">; +def ZERO8 : GP8<ZERO, "r0">; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; + // Condition register bits def CR0LT : CRBIT< 0, "0">; def CR0GT : CRBIT< 1, "1">; @@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, LR)>; + R31, R0, R1, LR, FP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, LR8)>; + X31, X13, X0, X1, LR8, FP8)>; + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>; +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index a59c775272..e740348717 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass, (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) >; -class Vector_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < +class Vector4_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), (elemType elemClass:$z), (elemType elemClass:$w))), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 0c7880d232..fa8f62de9c 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { - if (i == 2) - break; SDValue CstOffset; - if (!Operand.getValueType().isVector() && - SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; + if (Operand.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) + break; + + // Gather others constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } } + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + break; + + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; } - break; case ISD::FNEG: if (NegIdx[i] < 0) break; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index be3318a0b4..08650980fd 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) + const { + assert (Consts.size() <= 12 && "Too many operands in instructions group"); + unsigned Pair1 = 0, Pair2 = 0; + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned ReadConstHalf = Consts[i] & 2; + unsigned ReadConstIndex = Consts[i] & (~3); + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; + if (!Pair1) { + Pair1 = ReadHalfConst; + continue; + } + if (Pair1 == ReadHalfConst) + continue; + if (!Pair2) { + Pair2 = ReadHalfConst; + continue; + } + if (Pair2 != ReadHalfConst) + return false; + } + return true; +} + +bool +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { + std::vector<unsigned> Consts; + for (unsigned i = 0, n = MIs.size(); i < n; i++) { + const MachineInstr *MI = MIs[i]; + + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + if (!isALUInstr(MI->getOpcode())) + continue; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Consts.push_back(Const); + } + } + } + return fitsConstReadLimitations(Consts); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index efe721c00c..bf9569e659 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -53,6 +53,9 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; + bool canBundle(const std::vector<MachineInstr *> &) const; + /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c5fa3347dc..8c50d54cf5 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1979,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; +def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; +def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; // bitconvert patterns diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 40aec833ea..b07a585f08 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -14,5 +14,4 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) : MachineFunctionInfo() { - memset(Outputs, 0, sizeof(Outputs)); } diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 4b901f4bbc..13a46b8eec 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -26,7 +26,6 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector<unsigned, 4> LiveOuts; std::vector<unsigned> IndirectRegs; - SDNode *Outputs[16]; }; } // End llvm namespace diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 19baef94c7..9074364bb3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); InstKindLimit[IDAlu] = 120; // 120 minus 8 for security @@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -class ConstPairs { -private: - unsigned XYPair; - unsigned ZWPair; -public: - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { - for (unsigned i = 0; i < 3; i++) { - unsigned ReadConstChan = ReadConst[i] & 3; - unsigned ReadConstIndex = ReadConst[i] & (~3); - if (ReadConstChan < 2) { - if (!XYPair) { - XYPair = ReadConstIndex; - } - } else { - if (!ZWPair) { - ZWPair = ReadConstIndex; - } - } - } - } - - bool isCompatibleWith(const ConstPairs& CP) const { - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); - } -}; - -static -const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { - unsigned ReadConsts[3] = {0, 0, 0}; - R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - - if (!TII->isALUInstr(MI.getOpcode())) - return ConstPairs(ReadConsts); - - for (unsigned i = 0; i < 3; i++) { - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); - if (SrcIdx < 0) - break; - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) - ReadConsts[i] =MI.getOperand( - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); - } - return ConstPairs(ReadConsts); -} - -bool -R600SchedStrategy::isBundleable(const MachineInstr& MI) { - const ConstPairs &MIPair = getPairs(TII, MI); - for (unsigned i = 0; i < 4; i++) { - if (!InstructionsGroupCandidate[i]) - continue; - const ConstPairs &IGPair = getPairs(TII, - *InstructionsGroupCandidate[i]->getInstr()); - if (!IGPair.isCompatibleWith(MIPair)) - return false; - } - return true; -} - SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { if (Q.empty()) return NULL; for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); It != E; ++It) { SUnit *SU = *It; - if (isBundleable(*SU->getInstr())) { + InstructionsGroupCandidate.push_back(SU->getInstr()); + if (TII->canBundle(InstructionsGroupCandidate)) { + InstructionsGroupCandidate.pop_back(); Q.erase(It); return SU; + } else { + InstructionsGroupCandidate.pop_back(); } } return NULL; @@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); + InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit *SU = AttemptFillSlot(Chan); if (SU) { OccupedSlotsMask |= (1 << Chan); - InstructionsGroupCandidate[Chan] = SU; + InstructionsGroupCandidate.push_back(SU->getInstr()); return SU; } } diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index d74ff1e076..3d0367fd8e 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -98,7 +98,7 @@ public: virtual void releaseBottomNode(SUnit *SU); private: - SUnit *InstructionsGroupCandidate[4]; + std::vector<MachineInstr *> InstructionsGroupCandidate; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; @@ -112,7 +112,6 @@ private: void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); SUnit* pickOther(int QID); - bool isBundleable(const MachineInstr& MI); void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); }; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 063f5faa63..93f8c38a3a 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -229,6 +234,10 @@ EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } +MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { + return MVT::i32; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 0411565ee3..d65622526f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -48,6 +48,7 @@ public: virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; + virtual MVT getScalarShiftAmountTy(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 67fbdf7be1..98bd3dbb66 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) { Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); } -bool SIInsertWaits::unorderedDefines(MachineInstr &MI) { - - uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::LGKM_CNT) - return true; - - if (TSFlags & SIInstrFlags::EXP_CNT) - return ExpInstrTypesSeen == 3; - - return false; -} - Counters SIInsertWaits::handleOperands(MachineInstr &MI) { - bool UnorderedDefines = unorderedDefines(MI); Counters Result = ZeroCounts; // For each register affected by this @@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { if (Op.isDef()) { increaseCounters(Result, UsedRegs[j]); - if (UnorderedDefines) - increaseCounters(Result, DefinedRegs[j]); + increaseCounters(Result, DefinedRegs[j]); } if (Op.isUse()) diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 260c651dd4..2f10c388ad 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK < opName#" $dst, $src0", pattern >; -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass, + RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), - (ins SReg_64:$sbase, i32imm:$offset), + (ins baseClass:$sbase, i32imm:$offset), asm#" $dst, $sbase, $offset", [] >; def _SGPR : SMRD < op, 0, (outs dstClass:$dst), - (ins SReg_64:$sbase, SReg_32:$soff), + (ins baseClass:$sbase, SReg_32:$soff), asm#" $dst, $sbase, $soff", [] >; } diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0ab9e4ec0c..05b04a92b3 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT //def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; //def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; //def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; +def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; +def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; //def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; @@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM let mayLoad = 1 in { -defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; -//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; -//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +defm S_BUFFER_LOAD_DWORD : SMRD_Helper < + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < + 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < + 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < + 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < + 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 +>; } // mayLoad = 1 @@ -840,7 +854,9 @@ defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; let isCommutable = 1 in { @@ -1133,6 +1149,31 @@ def SI_KILL : InstSI < } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] +let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { + +def SI_INDIRECT_SRC : InstSI < + (outs VReg_32:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off", + [] +>; + +class SI_INDIRECT_DST<RegisterClass rc> : InstSI < + (outs rc:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val), + "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val", + [] +> { + let Constraints = "$src = $dst"; +} + +def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; +def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; +def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; +def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; + +} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] + } // end IsCodeGenOnly, isPseudo def : Pat< @@ -1241,22 +1282,83 @@ defm : SamplePatterns<VReg_128, v4i32>; defm : SamplePatterns<VReg_256, v8i32>; defm : SamplePatterns<VReg_512, v16i32>; -def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>; +/********** ============================================ **********/ +/********** Extraction, Insertion, Building and Casting **********/ +/********** ============================================ **********/ + +foreach Index = 0-2 in { + def Extract_Element_v2i32_#Index : Extract_Element < + i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2i32_#Index : Insert_Element < + i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v2f32_#Index : Extract_Element < + f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2f32_#Index : Insert_Element < + f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-3 in { + def Extract_Element_v4i32_#Index : Extract_Element < + i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4i32_#Index : Insert_Element < + i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v4f32_#Index : Extract_Element < + f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4f32_#Index : Insert_Element < + f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-7 in { + def Extract_Element_v8i32_#Index : Extract_Element < + i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8i32_#Index : Insert_Element < + i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>; + def Extract_Element_v8f32_#Index : Extract_Element < + f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8f32_#Index : Insert_Element < + f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-15 in { + def Extract_Element_v16i32_#Index : Extract_Element < + i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16i32_#Index : Insert_Element < + i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v16f32_#Index : Extract_Element < + f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16f32_#Index : Insert_Element < + f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; +} def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; -def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>; +def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>; def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; +def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>; def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; +def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1389,6 +1491,24 @@ def : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) >; +// 1. Offset as 8bit DWORD immediate +def : Pat < + (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset)) +>; + +// 3. Offset in an 32Bit VGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, VReg_32:$voff), + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ @@ -1426,4 +1546,48 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; +/********** ====================== **********/ +/********** Indirect adressing **********/ +/********** ====================== **********/ + +multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt, + SI_INDIRECT_DST IndDst> { + // 1. Extract with offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off)) + >; + + // 2. Extract without offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 VReg_32:$idx))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0)) + >; + + // 3. Insert with offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val)) + >; + + // 4. Insert without offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 VReg_32:$idx))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val)) + >; +} + +defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>; +defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>; +defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>; +defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; + } // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7c23d1706b..33bb8157b4 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -16,8 +16,7 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - /* XXX: We may need a seperate intrinsic here for loading integer values */ - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index b215aa22db..9a027e77eb 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -66,6 +66,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; + const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -84,9 +85,14 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void IndirectSrc(MachineInstr &MI); + void IndirectDst(MachineInstr &MI); + public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + MachineFunctionPass(ID), TRI(tm.getRegisterInfo()), + TII(tm.getInstrInfo()) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator I = MI; + + unsigned Save = MI.getOperand(1).getReg(); + unsigned Idx = MI.getOperand(3).getReg(); + + if (AMDGPU::SReg_32RegClass.contains(Idx)) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + MBB.insert(I, MovRel); + MI.eraseFromParent(); + return; + } + + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); + + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); + + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC) + .addReg(Idx); + + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC); + + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); + + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); + + // Do the actual move + MBB.insert(I, MovRel); + + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); + + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Vec = MI.getOperand(2).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Vec, RegState::Implicit); + + LoadM0(MI, MovRel); +} + +void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + unsigned Val = MI.getOperand(5).getReg(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) + .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(Val) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Dst, RegState::Implicit); + + LoadM0(MI, MovRel); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool HaveKill = false; @@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::S_BRANCH: Branch(MI); break; + + case AMDGPU::SI_INDIRECT_SRC: + IndirectSrc(MI); + break; + + case AMDGPU::SI_INDIRECT_DST_V2: + case AMDGPU::SI_INDIRECT_DST_V4: + case AMDGPU::SI_INDIRECT_DST_V8: + case AMDGPU::SI_INDIRECT_DST_V16: + IndirectDst(MI); + break; } } } diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 3dcad506d2..4f14931a9c 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; +def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// // [SV]Src_* register classes, can have either an immediate or an register @@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; +def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>; def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 357879bf6c..b53a1ed095 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index c6cafe59eb..ee88ce77c0 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -610,6 +610,9 @@ struct StringComparator { // Provided for compatibility with MSVC's debug mode. bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } + bool operator()(const char *LHS, const char *RHS) const { + return std::strcmp(LHS, RHS) < 0; + } }; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 7d8b49cdf2..e7282519d5 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() { delete AsmInfo; } +/// \brief Reset the target options based on the function's attributes. +void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { + const Function *F = MF->getFunction(); + TargetOptions &TO = MF->getTarget().Options; + +#define RESET_OPTION(X, Y) \ + do { \ + if (F->hasFnAttribute(Y)) \ + TO.X = \ + (F->getAttributes(). \ + getAttribute(AttributeSet::FunctionIndex, \ + Y).getValueAsString() == "true"); \ + } while (0) + + RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim"); + RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf"); + RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad"); + RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); + RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); + RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); + RESET_OPTION(UseSoftFloat, "use-soft-float"); + RESET_OPTION(DisableTailCalls, "disable-tail-calls"); +} + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. Reloc::Model TargetMachine::getRelocationModel() const { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ee5c2b2bfd..d5568e08d3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -62,6 +62,9 @@ private: X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, + SMLoc SizeDirLoc, unsigned Size); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, SmallString<64> &Err); @@ -170,30 +173,33 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc OffsetOfLoc; bool AddressOf; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + unsigned Size; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNo; - } Reg; - - struct { - const MCExpr *Val; - bool NeedAsmRewrite; - } Imm; - - struct { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - bool NeedSizeDir; - } Mem; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -231,11 +237,6 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } - bool needAsmRewrite() const { - assert(Kind == Immediate && "Invalid access!"); - return Imm.NeedAsmRewrite; - } - const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -332,11 +333,6 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } - unsigned getMemSize() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Size; - } - bool isOffsetOf() const { return OffsetOfLoc.getPointer(); } @@ -345,11 +341,6 @@ struct X86Operand : public MCParsedAsmOperand { return AddressOf; } - bool needSizeDirective() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.NeedSizeDir; - } - bool isMem() const { return Kind == Memory; } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); @@ -485,17 +476,15 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, - bool NeedRewrite = true){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; - Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -503,7 +492,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -512,7 +500,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -527,7 +515,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -890,6 +877,45 @@ public: } }; +X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, + SMLoc End, SMLoc SizeDirLoc, + unsigned Size) { + bool NeedSizeDir = false; + bool IsVarDecl = false; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); + if (!Size) { + Size = tType * 8; // Size is in terms of bits in this context. + NeedSizeDir = Size > 0; + } + } + + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + + if (NeedSizeDir) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, + /*Len*/0, Size)); + + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size); +} + X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned Size) { const AsmToken &Tok = Parser.getTok(); @@ -914,7 +940,12 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // Adjust the EndLoc due to the ']'. End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + + // We want the size directive before the '['. + SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); + return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); } } @@ -1036,40 +1067,9 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { if (getParser().parseExpression(Disp, End)) return 0; - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (isParsingInlineAsm()) { - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); - if (!Size) - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; - } - } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else { - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to - // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size, NeedSizeDir); - } + return CreateMemForInlineAsm(Disp, Start, End, Start, Size); } /// Parse the '.' operator. @@ -1197,7 +1197,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); - return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); + return X86Operand::CreateImm(Imm, Start, End); } X86Operand *X86AsmParser::ParseIntelOperand() { @@ -1221,6 +1221,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() { getLexer().is(AsmToken::Minus)) { const MCExpr *Val; if (!getParser().parseExpression(Val, End)) { + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); return X86Operand::CreateImm(Val, Start, End); } } @@ -1734,242 +1736,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { - switch (Inst.getOpcode()) { - default: return false; - case X86::AND16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, + bool isCmp) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + if (!isCmp) + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; +} - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::AX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); + case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); + case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); + case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); + case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); + case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); + case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); + case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); + case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); + case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); + case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); + case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); + case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); + case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); + case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); + case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); + case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); + case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); + case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); + case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); + case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); + case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); + case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); + case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); } } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204ae75..5fbefaec5e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ece38aa346..2518e02e2a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b5c3270065..85155f55e0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; + if (Subtarget->isTargetWindows()) + return false; + const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 960870dc60..23cfd6d72f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); - if (Subtarget->hasInt256()) { - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v4i32, Legal); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); - } setOperationAction(ISD::SDIV, MVT::v8i16, Custom); setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } @@ -1186,14 +1179,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); @@ -1210,15 +1195,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + } - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); - } + setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; @@ -7834,7 +7821,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Chain.getValue(1)); } - if (Subtarget->isTargetWindows()) { + if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7854,18 +7841,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or - // %gs:0x58 (64-bit). + // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly + // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) - : DAG.getExternalSymbol("_tls_array", - getPointerTy()), + SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) : + (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) : + DAG.getExternalSymbol("_tls_array", getPointerTy())); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); @@ -11490,16 +11478,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { - +static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - if (!Subtarget->hasSSE2()) - return SDValue(); - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -11610,6 +11595,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } } + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + unsigned RatioInLog2 = Log2_32_Ceil(Ratio); + uint64_t ShiftAmt = 0; + for (unsigned i = 0; i != Ratio; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); + } + // Check remaining shift amounts. + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Amt.getOperand(i + j)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRAI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + } + } + + return SDValue(); +} + +static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + + if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v4i32 || VT == MVT::v8i16 || + (Subtarget->hasInt256() && + ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v8i32 || VT == MVT::v16i16))) { + SDValue BaseShAmt; + EVT EltVT = VT.getVectorElementType(); + + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned i, j; + for (i = 0; i != NumElts; ++i) { + if (Amt.getOperand(i).getOpcode() == ISD::UNDEF) + continue; + break; + } + for (j = i; j != NumElts; ++j) { + SDValue Arg = Amt.getOperand(j); + if (Arg.getOpcode() == ISD::UNDEF) continue; + if (Arg != Amt.getOperand(i)) + break; + } + if (i != NumElts && j == NumElts) + BaseShAmt = Amt.getOperand(i); + } else { + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) + Amt = Amt.getOperand(0); + if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(Amt)->isSplat()) { + SDValue InVec = Amt.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { + unsigned SplatIdx = + cast<ShuffleVectorSDNode>(Amt)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt, + DAG.getIntPtrConstant(0)); + } + } + + if (BaseShAmt.getNode()) { + if (EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + else if (EltVT.bitsLT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRA: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v4i32: + case MVT::v8i16: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); + } + } + } + } + + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + std::vector<SDValue> Vals(Ratio); + for (unsigned i = 0; i != Ratio; ++i) + Vals[i] = Amt.getOperand(i); + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + for (unsigned j = 0; j != Ratio; ++j) + if (Vals[j] != Amt.getOperand(i + j)) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + SDValue V; + + if (!Subtarget->hasSSE2()) + return SDValue(); + + V = LowerScalarImmediateShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + V = LowerScalarVariableShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + // AVX2 has VPSLLV/VPSRAV/VPSRLV. + if (Subtarget->hasInt256()) { + if (Op.getOpcode() == ISD::SRL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SHL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return Op; + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); @@ -11826,8 +12029,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // fall through case MVT::v4i32: case MVT::v8i16: { - SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, - Op.getOperand(0), ShAmt, DAG); + // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Tmp1 = LowerVectorIntExtend(Op00, DAG); + if (Tmp1.getNode()) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. + Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG); return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } } @@ -12262,7 +12480,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - if (N->getOperand(0).getValueType() != MVT::v2i32 && + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, @@ -15918,124 +16137,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SHL) { SDValue V = PerformSHLCombine(N, DAG); if (V.getNode()) return V; } - // On X86 with SSE2 support, we can transform this to a vector shift if - // all elements are shifted by the same amount. We can't do this in legalize - // because the a constant vector is typically transformed to a constant pool - // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) - return SDValue(); - - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget->hasInt256() || - (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) - return SDValue(); - - SDValue ShAmtOp = N->getOperand(1); - EVT EltVT = VT.getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt = SDValue(); - if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - // Handle the case where the build_vector is all undef - // FIXME: Should DAG allow this? - if (i == NumElts) - return SDValue(); - - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - if (Arg != BaseShAmt) { - return SDValue(); - } - } - } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - SDValue InVec = ShAmtOp.getOperand(0); - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = InVec.getValueType().getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = InVec.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); - if (C->getZExtValue() == SplatIdx) - BaseShAmt = InVec.getOperand(1); - } - } - if (BaseShAmt.getNode() == 0) { - // Don't create instructions with illegal types after legalize - // types has run. - if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) && - !DCI.isBeforeLegalize()) - return SDValue(); - - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); - } - } else - return SDValue(); - - // The shift amount is an i32. - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); - - // The shift amount is identical so we can do a vector shift. - SDValue ValOp = N->getOperand(0); - switch (N->getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRA: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v4i32: - case MVT::v8i16: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG); - } - } + return SDValue(); } // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) @@ -16346,13 +16453,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - if (Mask.getOpcode() != X86ISD::VSRAI) - return SDValue(); - - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(1); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + unsigned SraAmt = ~0; + if (Mask.getOpcode() == ISD::SRA) { + SDValue Amt = Mask.getOperand(1); + if (isSplatVector(Amt.getNode())) { + SDValue SclrAmt = Amt->getOperand(0); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) + SraAmt = C->getZExtValue(); + } + } else if (Mask.getOpcode() == X86ISD::VSRAI) { + SDValue SraC = Mask.getOperand(1); + SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + } if ((SraAmt + 1) != EltBits) return SDValue(); diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d86a4065a7..19bdb96f05 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// // LEA - Load Effective Address - +let SchedRW = [WriteLEA] in { let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), @@ -36,41 +36,52 @@ let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; - - +} // SchedRW //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // +// SchedModel info for instruction that loads one value and gets the second +// (and possibly third) value from a register. +// This is used for instructions that put the memory operands before other +// uses. +class SchedLoadReg<SchedWrite SW> : Sched<[SW, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + ReadAfterLd, ReadAfterLd]>; + // Extra precision multiplication // AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 - + (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 - + [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", // EAX,EDX = EAX*GR32 + "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], - IIC_MUL32_REG>; + IIC_MUL32_REG>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", // RAX,RDX = RAX*GR64 + "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], - IIC_MUL64>; - + IIC_MUL64>, Sched<[WriteIMul]>; +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", @@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] - + (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] - + [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>; } let neverHasSideEffects = 1 in { +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], - IIC_IMUL8>; // AL,AH = AL*GR8 + IIC_IMUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], - IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 + IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], - IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 + IIC_IMUL32_RR>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], - IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 + IIC_IMUL64_RR>, Sched<[WriteIMul]>; let mayLoad = 1 in { +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; - // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize, + SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>; } } // neverHasSideEffects @@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +let isCommutable = 1, SchedRW = [WriteIMul] in { +// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, TB; -} +} // isCommutable, SchedRW // Register-Memory Signed Integer Multiply +let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (X86smul_flag GR64:$src1, (load addr:$src2)))], IIC_IMUL64_RM>, TB; +} // SchedRW } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { +let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), @@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))], IIC_IMUL64_RRI>; - +} // SchedRW // Memory-Integer Signed Integer Multiply +let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (X86smul_flag (load addr:$src1), i64immSExt8:$src2))], IIC_IMUL64_RMI>; +} // SchedRW } // Defs = [EFLAGS] @@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", [], IIC_DIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", [], IIC_DIV8_MEM>; + "div{b}\t$src", [], IIC_DIV8_MEM>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", [], IIC_DIV16>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", [], IIC_DIV32>; + "div{l}\t$src", [], IIC_DIV32>, + SchedLoadReg<WriteIDivLd>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", [], IIC_DIV64>; + "div{q}\t$src", [], IIC_DIV64>, + SchedLoadReg<WriteIDivLd>; } // Signed division/remainder. +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", [], IIC_IDIV8>; @@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", [], IIC_IDIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", [], IIC_IDIV8>; + "idiv{b}\t$src", [], IIC_IDIV8>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", [], IIC_IDIV32>; + "idiv{l}\t$src", [], IIC_IDIV32>, + SchedLoadReg<WriteIDivLd>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", [], IIC_IDIV64>; + "idiv{q}\t$src", [], IIC_IDIV64>, + SchedLoadReg<WriteIDivLd>; } } // hasSideEffects = 0 @@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), @@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), (implicit EFLAGS)], IIC_UNARY_REG>; -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +// Read-modify-write negate. +let SchedRW = [WriteALULd, WriteRMW] in { def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), @@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // SchedRW } // Defs = [EFLAGS] // Note: NOT does not set EFLAGS! -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; @@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; +} // SchedRW } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", @@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", @@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; } // CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -532,7 +575,7 @@ let CodeSize = 2 in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, + Sched<[WriteALU]>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, operands, []> { + mnemonic, operands, []>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", [], IIC_BIN_NONMEM>; + "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } //===----------------------------------------------------------------------===// @@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], - IIC_BIN_NONMEM>; + IIC_BIN_NONMEM>, Sched<[WriteALU]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in { // ADCX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize; @@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize; @@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { // ADOX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; @@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 8f2d0a1aae..a967a4da5c 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -16,7 +16,7 @@ // SetCC instructions. multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1 in { + isCommutable = 1, SchedRW = [WriteALU] in { def NAME#16rr : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { IIC_CMOV32_RR>, TB; } - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteALULd, ReadAfterLd] in { def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], - IIC_SET_R>, TB; + IIC_SET_R>, TB, Sched<[WriteALU]>; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), [(store (X86setcc OpNode, EFLAGS), addr:$dst)], - IIC_SET_M>, TB; + IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 734e5982b2..2b27bc5bc5 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -220,7 +220,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -229,11 +229,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, + Sched<[WriteZero]>; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -245,7 +246,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -254,10 +255,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + IIC_ALU_NONMEM>, Sched<[WriteALU]>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. @@ -990,9 +991,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 2eb454ded2..5ef0c3c13b 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB, + Sched<[WriteALULd]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB, + Sched<[WriteALULd]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALULd]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -106,24 +112,28 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>, + Sched<[WriteALU]>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>, + Sched<[WriteALULd]>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), @@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], - IIC_MOVZX>, TB; + IIC_MOVZX>, TB, Sched<[WriteALULd]>; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>, + Sched<[WriteALU]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], - IIC_MOVZX>; + IIC_MOVZX>, Sched<[WriteALULd]>; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d989ec7bb0..39165e24a8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -786,7 +786,7 @@ def LEAVE64 : I<0xC9, RawFrm, // let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG16>, OpSize; def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], @@ -803,9 +803,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, Requires<[In32BitMode]>; -} +} // mayLoad, SchedRW -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], @@ -832,29 +832,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, Requires<[In32BitMode]>; -} +} // mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], IIC_POP_MEM>; -} -let mayStore = 1 in { +} // mayLoad, SchedRW +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>; -} +} // mayStore, SchedRW } -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1, + SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), @@ -871,17 +872,18 @@ def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], - mayLoad=1, neverHasSideEffects=1 in { + mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], - mayStore=1, neverHasSideEffects=1 in { + mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +let Constraints = "$src = $dst", SchedRW = [WriteALU] in { +// GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -890,7 +892,7 @@ def BSWAP32r : I<0xC8, AddRegFrm, def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW // Bit scan instructions. let Defs = [EFLAGS] in { @@ -976,7 +978,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; //===----------------------------------------------------------------------===// // Move Instructions. // - +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; @@ -987,6 +989,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -1004,7 +1007,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } +} // SchedRW +let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; @@ -1017,9 +1022,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. +let SchedRW = [WriteALU] in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1038,6 +1045,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; +} // FIXME: These definitions are utterly broken // Just leave them commented out for now because they're useless outside @@ -1055,7 +1063,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), */ -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -1066,7 +1074,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; @@ -1081,6 +1089,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } +let SchedRW = [WriteStore] in { def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; @@ -1093,6 +1102,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1101,34 +1111,37 @@ let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, + Sched<[WriteMove]>; let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteStore]>; let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteLoad]>; } // Condition code ops, incl. set if equal/not equal/... +let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))], IIC_AHF>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], IIC_AHF>; // AH = flags - +} // SchedRW //===----------------------------------------------------------------------===// // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { +let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, @@ -1139,13 +1152,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; +} // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), @@ -1166,6 +1180,7 @@ let mayLoad = 1, hasSideEffects = 0 in { >, TB; } +let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], @@ -1178,10 +1193,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], IIC_BT_RI>, TB; +} // SchedRW // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. +let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) @@ -1194,8 +1211,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; +} // SchedRW let hasSideEffects = 0 in { +let SchedRW = [WriteALU] in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1203,8 +1222,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1214,6 +1234,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1221,8 +1242,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1232,6 +1254,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1239,6 +1262,7 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +} // SchedRW let mayLoad = 1, mayStore = 1 in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), @@ -1250,6 +1274,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1257,8 +1282,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1268,6 +1294,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1275,8 +1302,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1286,6 +1314,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1293,8 +1322,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1483,6 +1513,7 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; // Table lookup instructions def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>; +let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, @@ -1512,7 +1543,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[In32BitMode]>; +} // SchedRW +let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, @@ -1528,11 +1561,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // MOVBE Instructions // let Predicates = [HasMOVBE] in { + let SchedRW = [WriteALULd] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, @@ -1545,6 +1580,8 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, T8; + } + let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, @@ -1557,6 +1594,7 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, T8; + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0979752757..4d43ee1f15 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins<OpndItins arg_s, OpndItins arg_d> { @@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, // scalar +let Sched = WriteFAdd in { def SSE_ALU_F32S : OpndItins< IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM >; @@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins< def SSE_ALU_F64S : OpndItins< IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM >; +} def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S >; // parallel +let Sched = WriteFAdd in { def SSE_ALU_F32P : OpndItins< IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM >; @@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins< def SSE_ALU_F64P : OpndItins< IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM >; +} def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr>; + RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm>; + RC:$src1, mem_cpat:$src2))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class @@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - itins.rm, d>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, IIC_DEFAULT, d>, + Sched<[WriteVecLogic]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, IIC_DEFAULT, d>; + pat_rm, IIC_DEFAULT, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -345,7 +370,7 @@ let Predicates = [HasAVX] in { // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } @@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX] in { + isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } @@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in @@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, !strconcat(base_opc, asm_opr), [(set VR128:$dst, (vt (OpNode VR128:$src1, (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), !strconcat(base_opc, asm_opr), - [], IIC_SSE_MOV_S_RR>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; } multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, @@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, @@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; @@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [], itins.rr, d>; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [], itins.rm, d>; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2F]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2FLd, ReadAfterLd]>; } // neverHasSideEffects = 1 } @@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))], - IIC_SSE_CVT_Scalar_RR>; + IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, @@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert scalar single to scalar double @@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f64 (fextend FR32:$src)), @@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[UseSSE2]>; + Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; // Convert Packed Double FP to Packed DW Integers @@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in { def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - VEX; + VEX, Sched<[WriteCvtF2I]>; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX, + Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, + Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } @@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, + Sched<[WriteCvtF2ILd]>; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in { def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, + Sched<[WriteCvtF2ILd]>; // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB; + IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB; + IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; } // Convert Packed DW Integers to Packed Double FP @@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in { let neverHasSideEffects = 1, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - []>, VEX; + []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, + Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L, + Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, + Sched<[WriteCvtI2F]>; } let neverHasSideEffects = 1, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { @@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in { def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; // AVX 256-bit register conversion intrinsics @@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RR>; + IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RM>; + IIC_SSE_ALU_F32S_RM>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], - itins.rr>; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], - IIC_SSE_COMIS_RR, d>; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))], - IIC_SSE_COMIS_RM, d>; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX, VEX_L; + SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX, VEX_L; + OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1] in { +let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), IIC_SSE_PREFETCH>, TB; } +// FIXME: How should these memory instructions be modeled? +let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], @@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, TB, Requires<[HasSSE2]>; +} // SchedRW def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, - (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpNode VR256:$src1, - (bc_frag (memopv4i64 addr:$src2))))]>; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int @@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>, VEX; + IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends // +let SchedRW = [WriteMove] in { let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>; } +} // SchedRW -let AddedComplexity = 20 in { +let AddedComplexity = 20, SchedRW = [WriteLoad] in { def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>; -} +} // AddedComplexity, SchedRW let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. @@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int // + +let SchedRW = [WriteLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix +} // SchedRW //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int // +let SchedRW = [WriteStore] in { def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; +} // SchedRW //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, + Sched<[WriteStore]>; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[UseSSE2]>; + XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)), // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // +let SchedRW = [WriteVecLogic] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, XS, Requires<[UseSSE2]>; +} // SchedRW +let SchedRW = [WriteVecLogicLd] in { let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; } +} // SchedRW let AddedComplexity = 20 in { let Predicates = [HasAVX] in { @@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in { } // Instructions to match in the assembler +let SchedRW = [WriteMove] in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; @@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; +} // SchedRW // Instructions for the disassembler // xr = XMM register // xm = mem64 +let SchedRW = [WriteMove] in { let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; +} // SchedRW //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP @@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (vt (OpNode RC:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (OpNode (mem_frag addr:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> { let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [], IIC_SSE_MOV_LH>; + [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, + Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>; + (scalar_to_vector (loadf64 addr:$src)))))]>, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], IIC_SSE_LDDQU>; +} //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize; + OpSize, Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (memopv4i64 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize, + Sched<[WriteVecALULd]>; } let Predicates = [HasAVX] in { @@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins< def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; +} +let Sched = WriteShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; +let Sched = WriteVecALU in def SSE_PSIGN : OpndItins< IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM >; +let Sched = WriteVecIMul in def SSE_PMULHRSW : OpndItins< IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW >; @@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, @@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, @@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; +} // SchedRW def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 3caa1b538c..053417ccde 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, TB; @@ -35,6 +36,7 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))], IIC_INT3>; +} // SchedRW def : Pat<(debugtrap), (INT3)>; @@ -43,6 +45,7 @@ def : Pat<(debugtrap), // FIXME: This doesn't work because InstAlias can't match immediate constants. //def : InstAlias<"int\t$3", (INT3)>; +let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; @@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Input/Output Instructions. // +let SchedRW = [WriteSystem] in { let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>; @@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize; def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from debug registers +let SchedRW = [WriteSystem] in { def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), @@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from control registers +let SchedRW = [WriteSystem] in { def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), @@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Segment override instruction prefixes @@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // Moves to and from segment registers. // +let SchedRW = [WriteMove] in { def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), @@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; +} // SchedRW //===----------------------------------------------------------------------===// // Segmentation support instructions. +let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), @@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Descriptor-table support instructions +let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), @@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; - +} // SchedRW + //===----------------------------------------------------------------------===// // Specialized register support +let SchedRW = [WriteSystem] in { def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; @@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Cache instructions +let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // XSAVE instructions +let SchedRW = [WriteSystem] in { let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; @@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in { def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3af1b3e06b..a8a9fd8acc 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -407,6 +407,57 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B + // if one of the registers is extended, but other isn't. + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d99d085298..bcdd0eb56d 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,6 +7,88 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast<SchedWrite>(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 def IIC_DEFAULT : InstrItinClass; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be2a997b8e..3e3b86edbb 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry<MVT> AVX2CostTable[] = { + // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to + // customize them to detect the cases where shift amount is a scalar one. + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 1 }, + { ISD::SRA, MVT::v4i32, 1 }, + { ISD::SHL, MVT::v8i32, 1 }, + { ISD::SRL, MVT::v8i32, 1 }, + { ISD::SRA, MVT::v8i32, 1 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 1 }, + { ISD::SHL, MVT::v4i64, 1 }, + { ISD::SRL, MVT::v4i64, 1 }, + }; + + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable), + ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2CostTable[Idx].Cost; + } + static const CostTblEntry<MVT> AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. @@ -257,8 +280,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; |