aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp55
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp83
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp704
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARM.td10
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp2
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp57
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp55
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp11
-rw-r--r--lib/Target/ARM/ARMSubtarget.h9
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp16
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp94
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp250
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp9
-rw-r--r--lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp50
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h3
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp307
-rw-r--r--lib/Target/Mips/CMakeLists.txt4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp10
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp308
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h51
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp689
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h80
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp670
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h93
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp893
-rw-r--r--lib/Target/Mips/MipsISelLowering.h75
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h3
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td21
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp460
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h57
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp197
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h46
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp81
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td5
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp123
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h12
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp6
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp325
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h21
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td205
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td12
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp166
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td296
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp260
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h21
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td18
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td4
-rw-r--r--lib/Target/R600/AMDILISelDAGToDAG.cpp33
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp54
-rw-r--r--lib/Target/R600/R600InstrInfo.h3
-rw-r--r--lib/Target/R600/R600Instructions.td4
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp1
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp75
-rw-r--r--lib/Target/R600/R600MachineScheduler.h3
-rw-r--r--lib/Target/R600/SIISelLowering.cpp9
-rw-r--r--lib/Target/R600/SIISelLowering.h1
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp16
-rw-r--r--lib/Target/R600/SIInstrInfo.td7
-rw-r--r--lib/Target/R600/SIInstructions.td212
-rw-r--r--lib/Target/R600/SIIntrinsics.td3
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp117
-rw-r--r--lib/Target/R600/SIRegisterInfo.td14
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h3
-rw-r--r--lib/Target/TargetLibraryInfo.cpp3
-rw-r--r--lib/Target/TargetMachine.cpp26
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp450
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp27
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp24
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp437
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td187
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td9
-rw-r--r--lib/Target/X86/X86InstrCompiler.td20
-rw-r--r--lib/Target/X86/X86InstrExtension.td65
-rw-r--r--lib/Target/X86/X86InstrInfo.td90
-rw-r--r--lib/Target/X86/X86InstrSSE.td762
-rw-r--r--lib/Target/X86/X86InstrSystem.td24
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp51
-rw-r--r--lib/Target/X86/X86Schedule.td82
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp27
97 files changed, 6389 insertions, 3443 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index cca6d12e16..572617cc07 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
return TopOfFrameOffset - FrameRegPos;
}
-/// Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
- // FIXME: Make generic? Really consider after upstreaming. This code is now
- // shared between PEI, ARM *and* here.
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
void
AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-save register for this purpose or allocate an extra spill slot.
bool BigStack =
- (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
|| MFI->hasVarSizedObjects() // Access will be from X29: messes things up
|| (MFI->adjustsStack() && !hasReservedCallFrame(MF));
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c1695dacb4..69bb80a485 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -160,44 +160,53 @@ private:
SMLoc StartLoc, EndLoc;
+ struct ImmWithLSLOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
+ bool ImplicitAmount;
+ };
+
+ struct CondCodeOp {
+ A64CC::CondCodes Code;
+ };
+
+ struct FPImmOp {
+ double Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ShiftExtendOp {
+ A64SE::ShiftExtSpecifiers ShiftType;
+ unsigned Amount;
+ bool ImplicitAmount;
+ };
+
+ struct SysRegOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
union {
- struct {
- const MCExpr *Val;
- unsigned ShiftAmount;
- bool ImplicitAmount;
- } ImmWithLSL;
-
- struct {
- A64CC::CondCodes Code;
- } CondCode;
-
- struct {
- double Val;
- } FPImm;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- A64SE::ShiftExtSpecifiers ShiftType;
- unsigned Amount;
- bool ImplicitAmount;
- } ShiftExtend;
-
- struct {
- const char *Data;
- unsigned Length;
- } SysReg;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
+ struct ImmWithLSLOp ImmWithLSL;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct ImmOp Imm;
+ struct RegOp Reg;
+ struct ShiftExtendOp ShiftExtend;
+ struct SysRegOp SysReg;
+ struct TokOp Tok;
};
AArch64Operand(KindTy K, SMLoc S, SMLoc E)
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
new file mode 100644
index 0000000000..f0d4dbe2bf
--- /dev/null
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -0,0 +1,704 @@
+//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The Cortex-A15 processor employs a tracking scheme in its register renaming
+// in order to process each instruction's micro-ops speculatively and
+// out-of-order with appropriate forwarding. The ARM architecture allows VFP
+// instructions to read and write 32-bit S-registers. Each S-register
+// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
+//
+// There are several instruction patterns which can be used to provide this
+// capability which can provide higher performance than other, potentially more
+// direct patterns, specifically around when one micro-op reads a D-register
+// operand that has recently been written as one or more S-register results.
+//
+// This file defines a pre-regalloc pass which looks for SPR producers which
+// are going to be used by a DPR (or QPR) consumers and creates the more
+// optimized access pattern.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "a15-sd-optimizer"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+ struct A15SDOptimizer : public MachineFunctionPass {
+ static char ID;
+ A15SDOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM A15 S->D optimizer";
+ }
+
+ private:
+ const ARMBaseInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ bool runOnInstruction(MachineInstr *MI);
+
+ //
+ // Instruction builder helpers
+ //
+ unsigned createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane,
+ bool QPR=false);
+
+ unsigned createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC);
+
+ unsigned createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1);
+
+ unsigned createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2);
+
+ unsigned createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert);
+
+ unsigned createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL);
+
+ //
+ // Various property checkers
+ //
+ bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
+ bool hasPartialWrite(MachineInstr *MI);
+ SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
+ unsigned getDPRLaneFromSPR(unsigned SReg);
+
+ //
+ // Methods used for getting the definitions of partial registers
+ //
+
+ MachineInstr *elideCopies(MachineInstr *MI);
+ void elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs);
+
+ //
+ // Pattern optimization methods
+ //
+ unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
+ unsigned optimizeSDPattern(MachineInstr *MI);
+ unsigned getPrefSPRLane(unsigned SReg);
+
+ //
+ // Sanitizing method - used to make sure if don't leave dead code around.
+ //
+ void eraseInstrWithNoUses(MachineInstr *MI);
+
+ //
+ // A map used to track the changes done by this pass.
+ //
+ std::map<MachineInstr*, unsigned> Replacements;
+ std::set<MachineInstr *> DeadInstr;
+ };
+ char A15SDOptimizer::ID = 0;
+} // end anonymous namespace
+
+// Returns true if this is a use of a SPR register.
+bool A15SDOptimizer::usesRegClass(MachineOperand &MO,
+ const TargetRegisterClass *TRC) {
+ if (!MO.isReg())
+ return false;
+ unsigned Reg = MO.getReg();
+
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
+ else
+ return TRC->contains(Reg);
+}
+
+unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
+ unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
+ &ARM::DPRRegClass);
+ if (DReg != ARM::NoRegister) return ARM::ssub_1;
+ return ARM::ssub_0;
+}
+
+// Get the subreg type that is most likely to be coalesced
+// for an SPR register that will be used in VDUP32d pseudo.
+unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
+ if (!TRI->isVirtualRegister(SReg))
+ return getDPRLaneFromSPR(SReg);
+
+ MachineInstr *MI = MRI->getVRegDef(SReg);
+ if (!MI) return ARM::ssub_0;
+ MachineOperand *MO = MI->findRegisterDefOperand(SReg);
+
+ assert(MO->isReg() && "Non register operand found!");
+ if (!MO) return ARM::ssub_0;
+
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ SReg = MI->getOperand(1).getReg();
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+ if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
+ return ARM::ssub_0;
+ }
+ return getDPRLaneFromSPR(SReg);
+}
+
+// MI is known to be dead. Figure out what instructions
+// are also made dead by this and mark them for removal.
+void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
+ SmallVector<MachineInstr *, 8> Front;
+ DeadInstr.insert(MI);
+
+ DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
+ Front.push_back(MI);
+
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // MI is already known to be dead. We need to see
+ // if other instructions can also be removed.
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if ((!MO.isReg()) || (!MO.isUse()))
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TRI->isVirtualRegister(Reg))
+ continue;
+ MachineOperand *Op = MI->findRegisterDefOperand(Reg);
+
+ if (!Op)
+ continue;
+
+ MachineInstr *Def = Op->getParent();
+
+ // We don't need to do anything if we have already marked
+ // this instruction as being dead.
+ if (DeadInstr.find(Def) != DeadInstr.end())
+ continue;
+
+ // Check if all the uses of this instruction are marked as
+ // dead. If so, we can also mark this instruction as being
+ // dead.
+ bool IsDead = true;
+ for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
+ MachineOperand &MODef = Def->getOperand(j);
+ if ((!MODef.isReg()) || (!MODef.isDef()))
+ continue;
+ unsigned DefReg = MODef.getReg();
+ if (!TRI->isVirtualRegister(DefReg)) {
+ IsDead = false;
+ break;
+ }
+ for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
+ EE = MRI->use_end();
+ II != EE; ++II) {
+ // We don't care about self references.
+ if (&*II == Def)
+ continue;
+ if (DeadInstr.find(&*II) == DeadInstr.end()) {
+ IsDead = false;
+ break;
+ }
+ }
+ }
+
+ if (!IsDead) continue;
+
+ DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
+ DeadInstr.insert(Def);
+ }
+ }
+}
+
+// Creates the more optimized patterns and generally does all the code
+// transformations in this pass.
+unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
+ }
+
+ if (MI->isInsertSubreg()) {
+ unsigned DPRReg = MI->getOperand(1).getReg();
+ unsigned SPRReg = MI->getOperand(2).getReg();
+
+ if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
+ MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
+
+ if (DPRMI && SPRMI) {
+ // See if the first operand of this insert_subreg is IMPLICIT_DEF
+ MachineInstr *ECDef = elideCopies(DPRMI);
+ if (ECDef != 0 && ECDef->isImplicitDef()) {
+ // Another corner case - if we're inserting something that is purely
+ // a subreg copy of a DPR, just use that DPR.
+
+ MachineInstr *EC = elideCopies(SPRMI);
+ // Is it a subreg copy of ssub_0?
+ if (EC && EC->isCopy() &&
+ EC->getOperand(1).getSubReg() == ARM::ssub_0) {
+ DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
+
+ // Find the thing we're subreg copying out of - is it of the same
+ // regclass as DPRMI? (i.e. a DPR or QPR).
+ unsigned FullReg = SPRMI->getOperand(1).getReg();
+ const TargetRegisterClass *TRC =
+ MRI->getRegClass(MI->getOperand(1).getReg());
+ if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
+ DEBUG(dbgs() << "Subreg copy is compatible - returning ");
+ DEBUG(dbgs() << PrintReg(FullReg) << "\n");
+ eraseInstrWithNoUses(MI);
+ return FullReg;
+ }
+ }
+
+ return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
+ }
+ }
+ }
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
+ &ARM::SPRRegClass)) {
+ // See if all bar one of the operands are IMPLICIT_DEF and insert the
+ // optimizer pattern accordingly.
+ unsigned NumImplicit = 0, NumTotal = 0;
+ unsigned NonImplicitReg = ~0U;
+
+ for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
+ if (!MI->getOperand(I).isReg())
+ continue;
+ ++NumTotal;
+ unsigned OpReg = MI->getOperand(I).getReg();
+
+ if (!TRI->isVirtualRegister(OpReg))
+ break;
+
+ MachineInstr *Def = MRI->getVRegDef(OpReg);
+ if (!Def)
+ break;
+ if (Def->isImplicitDef())
+ ++NumImplicit;
+ else
+ NonImplicitReg = MI->getOperand(I).getReg();
+ }
+
+ if (NumImplicit == NumTotal - 1)
+ return optimizeAllLanesPattern(MI, NonImplicitReg);
+ else
+ return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
+ }
+
+ assert(0 && "Unhandled update pattern!");
+ return 0;
+}
+
+// Return true if this MachineInstr inserts a scalar (SPR) value into
+// a D or Q register.
+bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
+ // The only way we can do a partial register update is through a COPY,
+ // INSERT_SUBREG or REG_SEQUENCE.
+ if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
+ &ARM::SPRRegClass))
+ return true;
+
+ if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
+ return true;
+
+ return false;
+}
+
+// Looks through full copies to get the instruction that defines the input
+// operand for MI.
+MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
+ if (!MI->isFullCopy())
+ return MI;
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ return NULL;
+ MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!Def)
+ return NULL;
+ return elideCopies(Def);
+}
+
+// Look through full copies and PHIs to get the set of non-copy MachineInstrs
+// that can produce MI.
+void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
+ SmallVectorImpl<MachineInstr*> &Outs) {
+ // Looking through PHIs may create loops so we need to track what
+ // instructions we have visited before.
+ std::set<MachineInstr *> Reached;
+ SmallVector<MachineInstr *, 8> Front;
+ Front.push_back(MI);
+ while (Front.size() != 0) {
+ MI = Front.back();
+ Front.pop_back();
+
+ // If we have already explored this MachineInstr, ignore it.
+ if (Reached.find(MI) != Reached.end())
+ continue;
+ Reached.insert(MI);
+ if (MI->isPHI()) {
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ unsigned Reg = MI->getOperand(I).getReg();
+ if (!TRI->isVirtualRegister(Reg)) {
+ continue;
+ }
+ MachineInstr *NewMI = MRI->getVRegDef(Reg);
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ }
+ } else if (MI->isFullCopy()) {
+ if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
+ continue;
+ MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ if (!NewMI)
+ continue;
+ Front.push_back(NewMI);
+ } else {
+ DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
+ Outs.push_back(MI);
+ }
+ }
+}
+
+// Return the DPR virtual registers that are read by this machine instruction
+// (if any).
+SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
+ if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
+ MI->isKill())
+ return SmallVector<unsigned, 8>();
+
+ SmallVector<unsigned, 8> Defs;
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ if (!usesRegClass(MO, &ARM::DPRRegClass) &&
+ !usesRegClass(MO, &ARM::QPRRegClass))
+ continue;
+
+ Defs.push_back(MO.getReg());
+ }
+ return Defs;
+}
+
+// Creates a DPR register from an SPR one by using a VDUP.
+unsigned
+A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg, unsigned Lane, bool QPR) {
+ unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
+ &ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
+ Out)
+ .addReg(Reg)
+ .addImm(Lane));
+
+ return Out;
+}
+
+// Creates a SPR register from a DPR by copying the value in lane 0.
+unsigned
+A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned DReg, unsigned Lane,
+ const TargetRegisterClass *TRC) {
+ unsigned Out = MRI->createVirtualRegister(TRC);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::COPY), Out)
+ .addReg(DReg, 0, Lane);
+
+ return Out;
+}
+
+// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
+unsigned
+A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Reg1, unsigned Reg2) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::REG_SEQUENCE), Out)
+ .addReg(Reg1)
+ .addImm(ARM::dsub_0)
+ .addReg(Reg2)
+ .addImm(ARM::dsub_1);
+ return Out;
+}
+
+// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
+// and merges them into one DPR register.
+unsigned
+A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL,
+ unsigned Ssub0, unsigned Ssub1) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ AddDefaultPred(BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1));
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL, unsigned DReg, unsigned Lane,
+ unsigned ToInsert) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::INSERT_SUBREG), Out)
+ .addReg(DReg)
+ .addReg(ToInsert)
+ .addImm(Lane);
+
+ return Out;
+}
+
+unsigned
+A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ DebugLoc DL) {
+ unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
+ BuildMI(MBB,
+ InsertBefore,
+ DL,
+ TII->get(TargetOpcode::IMPLICIT_DEF), Out);
+ return Out;
+}
+
+// This function inserts instructions in order to optimize interactions between
+// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
+// lanes, and the using VEXT instructions to recompose the result.
+unsigned
+A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
+ MachineBasicBlock::iterator InsertPt(MI);
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &MBB = *MI->getParent();
+ InsertPt++;
+ unsigned Out;
+
+ if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
+ unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_0, &ARM::DPRRegClass);
+ unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
+ ARM::dsub_1, &ARM::DPRRegClass);
+
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
+ unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
+ Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
+
+ Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
+
+ } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
+ unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
+ unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
+ Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
+
+ } else {
+ assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
+ "Found unexpected regclass!");
+
+ unsigned PrefLane = getPrefSPRLane(Reg);
+ unsigned Lane;
+ switch (PrefLane) {
+ case ARM::ssub_0: Lane = 0; break;
+ case ARM::ssub_1: Lane = 1; break;
+ default: llvm_unreachable("Unknown preferred lane!");
+ }
+
+ bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
+
+ Out = createImplicitDef(MBB, InsertPt, DL);
+ Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
+ Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
+ eraseInstrWithNoUses(MI);
+ }
+ return Out;
+}
+
+bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
+ // We look for instructions that write S registers that are then read as
+ // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
+ // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
+ // merge two SPR values to form a DPR register. In order avoid false
+ // positives we make sure that there is an SPR producer so we look past
+ // COPY and PHI nodes to find it.
+ //
+ // The best code pattern for when an SPR producer is going to be used by a
+ // DPR or QPR consumer depends on whether the other lanes of the
+ // corresponding DPR/QPR are currently defined.
+ //
+ // We can handle these efficiently, depending on the type of
+ // pseudo-instruction that is producing the pattern
+ //
+ // * COPY: * VDUP all lanes and merge the results together
+ // using VEXTs.
+ //
+ // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
+ // lane, and the other lane(s) of the DPR/QPR register
+ // that we are inserting in are undefined, use the
+ // original DPR/QPR value.
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+ // * REG_SEQUENCE: * If all except one of the input operands are
+ // IMPLICIT_DEFs, insert the VDUP pattern for just the
+ // defined input operand
+ // * Otherwise, fall back on the same stategy as COPY.
+ //
+
+ // First, get all the reads of D-registers done by this instruction.
+ SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
+ bool Modified = false;
+
+ for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end();
+ I != E; ++I) {
+ // Follow the def-use chain for this DPR through COPYs, and also through
+ // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
+ // we can end up with multiple defs of this DPR.
+
+ SmallVector<MachineInstr *, 8> DefSrcs;
+ if (!TRI->isVirtualRegister(*I))
+ continue;
+ MachineInstr *Def = MRI->getVRegDef(*I);
+ if (!Def)
+ continue;
+
+ elideCopiesAndPHIs(Def, DefSrcs);
+
+ for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(),
+ EE = DefSrcs.end(); II != EE; ++II) {
+ MachineInstr *MI = *II;
+
+ // If we've already analyzed and replaced this operand, don't do
+ // anything.
+ if (Replacements.find(MI) != Replacements.end())
+ continue;
+
+ // Now, work out if the instruction causes a SPR->DPR dependency.
+ if (!hasPartialWrite(MI))
+ continue;
+
+ // Collect all the uses of this MI's DPR def for updating later.
+ SmallVector<MachineOperand*, 8> Uses;
+ unsigned DPRDefReg = MI->getOperand(0).getReg();
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
+ E = MRI->use_end(); I != E; ++I)
+ Uses.push_back(&I.getOperand());
+
+ // We can optimize this.
+ unsigned NewReg = optimizeSDPattern(MI);
+
+ if (NewReg != 0) {
+ Modified = true;
+ for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ DEBUG(dbgs() << "Replacing operand "
+ << **I << " with "
+ << PrintReg(NewReg) << "\n");
+ (*I)->substVirtReg(NewReg, 0, *TRI);
+ }
+ }
+ Replacements[MI] = NewReg;
+ }
+ }
+ return Modified;
+}
+
+bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+ TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+ bool Modified = false;
+
+ DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
+
+ DeadInstr.clear();
+ Replacements.clear();
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+
+ for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
+ MI != ME;) {
+ Modified |= runOnInstruction(MI++);
+ }
+
+ }
+
+ for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
+ E = DeadInstr.end();
+ I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Modified;
+}
+
+FunctionPass *llvm::createA15SDOptimizerPass() {
+ return new A15SDOptimizer();
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 5faf8c320c..80e5f37eb0 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
JITCodeEmitter &JCE);
+FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMGlobalBaseRegPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46915eecf6..68380847a0 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -143,14 +143,12 @@ include "ARMSchedule.td"
// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
"Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 58c779830e..13ec208793 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr)
.addReg(ARM::PC)
- .addImm(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
// Add predicate operands.
.addImm(ARMCC::AL)
.addReg(0)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ed001ea24a..ed8b9cd9a1 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
// changed into a VORR that can go down the NEON pipeline.
- if (!WidenVMOVS || !MI->isCopy())
+ if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
return false;
// Look for a copy between even S-registers. That is where we keep floats
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0ca6450e2b..3b12408768 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
return FnSize;
}
-/// estimateStackSize - Estimate and return the size of the frame.
-/// FIXME: Make generic?
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- unsigned MaxAlign = MFI->getMaxAlignment();
- int Offset = 0;
-
- // This code is very, very similar to PEI::calculateFrameObjectOffsets().
- // It really should be refactored to share code. Until then, changes
- // should keep in mind that there's tight coupling between the two.
-
- for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -MFI->getObjectOffset(i);
- if (FixedOff > Offset) Offset = FixedOff;
- }
- for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
- if (MFI->isDeadObjectIndex(i))
- continue;
- Offset += MFI->getObjectSize(i);
- unsigned Align = MFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset+Align-1)/Align*Align;
-
- MaxAlign = std::max(Align, MaxAlign);
- }
-
- if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
- Offset += MFI->getMaxCallFrameSize();
-
- // Round up the size to a multiple of the alignment. If the function has
- // any calls or alloca's, align to the target's StackAlignment value to
- // ensure that the callee's frame or the alloca data is suitably aligned;
- // otherwise, for leaf functions, align to the TransientStackAlignment
- // value.
- unsigned StackAlign;
- if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
- (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
- StackAlign = TFI->getStackAlignment();
- else
- StackAlign = TFI->getTransientStackAlignment();
-
- // If the frame pointer is eliminated, all frame offsets will be relative to
- // SP not FP. Align to MaxAlign so this works.
- StackAlign = std::max(StackAlign, MaxAlign);
- unsigned AlignMask = StackAlign - 1;
- Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
- return (unsigned)Offset;
-}
-
/// estimateRSStackSizeLimit - Look at each instruction that references stack
/// frames and return the stack size limit beyond which some of these
/// instructions will require a scratch register during their expansion later.
@@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// we've used all the registers and so R4 is already used, so not marking
// it here will be OK.
// FIXME: It will be better just to find spare register here.
- unsigned StackSize = estimateStackSize(MF);
+ unsigned StackSize = MFI->estimateStackSize(MF);
if (MFI->hasVarSizedObjects() || StackSize > 508)
MRI.setPhysRegUsed(ARM::R4);
}
@@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// worth the effort and added fragility?
bool BigStack =
(RS &&
- (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
estimateRSStackSizeLimit(MF, this)))
|| MFI->hasVarSizedObjects()
|| (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 514971f01e..40d2e8d265 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ // Custom expand long extensions to vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -3433,6 +3443,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DestVT = N->getValueType(0);
+
+ assert(DestVT.getSizeInBits() > 128 &&
+ "Custom sext/zext expansion needs >128-bit vector.");
+ // If this is a normal length extension, use the default expansion.
+ if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+ SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+ return SDValue();
+
+ DebugLoc dl = N->getDebugLoc();
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+ EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts);
+ EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts/2);
+ EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+ NumElts/2);
+
+ Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+ SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(0));
+ SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(NumElts/2));
+ ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+ ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -5621,6 +5672,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = ExpandVectorExtension(N, DAG);
+ break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e11314d4fc..739300e4ef 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
+ const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
+ , Options(Options)
, TargetABI(ARM_ABI_APCS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
@@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() {
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
+ UnsafeFPMath = false;
}
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
@@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// configuration.
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
AllowsUnalignedMem = true;
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8ce22e1de2..5b5ee6aeb8 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,6 +26,7 @@
namespace llvm {
class GlobalValue;
class StringRef;
+class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
@@ -159,6 +160,9 @@ protected:
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -175,6 +179,9 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
public:
enum {
isELF, isDarwin
@@ -189,7 +196,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 774521852a..b0f9e56db7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
cl::init(true));
+static cl::opt<bool>
+DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
+ cl::desc("Inhibit optimization of S->D register accesses on A15"),
+ cl::init(false));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
+ Subtarget(TT, CPU, FS, Options),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
@@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() {
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9())
addPass(createMLxExpansionPass());
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
+ getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
+ addPass(createA15SDOptimizerPass());
+ }
return true;
}
@@ -174,7 +185,8 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
- if (getARMSubtarget().hasNEON())
+ if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) &&
+ getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 01c04b48cf..1019b972e9 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // Single to/from double precision conversions.
+ static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ // Vector fptrunc/fpext conversions.
+ { ISD::FP_ROUND, MVT::v2f64, 2 },
+ { ISD::FP_EXTEND, MVT::v2f32, 2 },
+ { ISD::FP_EXTEND, MVT::v4f32, 4 }
+ };
+
+ if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
+ ISD == ISD::FP_EXTEND)) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * NEONFltDblTbl[Idx].Cost;
+ }
+
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
@@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using load/stores to the stack.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
};
if (SrcTy.isVector() && ST->hasNEON()) {
@@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return NEONFloatConversionTbl[Idx].Cost;
}
-
// Scalar integer to float conversions.
static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
@@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
return ARMIntegerConversionTbl[Idx].Cost;
}
-
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
}
@@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // Lowering of some vector selects is currently far from perfect.
+ static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+ array_lengthof(NEONVectorSelectTbl),
+ ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return NEONVectorSelectTbl[Idx].Cost;
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
return LT.first;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 6c678fdbd7..ed7b7ec9d2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SmallVector<unsigned, 8> Registers;
+ struct CCOp {
+ ARMCC::CondCodes Val;
+ };
+
+ struct CopOp {
+ unsigned Val;
+ };
+
+ struct CoprocOptionOp {
+ unsigned Val;
+ };
+
+ struct ITMaskOp {
+ unsigned Mask:4;
+ };
+
+ struct MBOptOp {
+ ARM_MB::MemBOpt Val;
+ };
+
+ struct IFlagsOp {
+ ARM_PROC::IFlags Val;
+ };
+
+ struct MMaskOp {
+ unsigned Val;
+ };
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned LaneIndex;
+ bool isDoubleSpaced;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ /// Combined record for all forms of ARM address expressions.
+ struct MemoryOp {
+ unsigned BaseRegNum;
+ // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
+ // was specified.
+ const MCConstantExpr *OffsetImm; // Offset immediate value
+ unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
+ ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
+ unsigned ShiftImm; // shift for OffsetReg.
+ unsigned Alignment; // 0 = no alignment specified
+ // n = alignment in bytes (2, 4, 8, 16, or 32)
+ unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
+ };
+
+ struct PostIdxRegOp {
+ unsigned RegNum;
+ bool isAdd;
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned ShiftImm;
+ };
+
+ struct ShifterImmOp {
+ bool isASR;
+ unsigned Imm;
+ };
+
+ struct RegShiftedRegOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftReg;
+ unsigned ShiftImm;
+ };
+
+ struct RegShiftedImmOp {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned SrcReg;
+ unsigned ShiftImm;
+ };
+
+ struct RotImmOp {
+ unsigned Imm;
+ };
+
+ struct BitfieldOp {
+ unsigned LSB;
+ unsigned Width;
+ };
+
union {
- struct {
- ARMCC::CondCodes Val;
- } CC;
-
- struct {
- unsigned Val;
- } Cop;
-
- struct {
- unsigned Val;
- } CoprocOption;
-
- struct {
- unsigned Mask:4;
- } ITMask;
-
- struct {
- ARM_MB::MemBOpt Val;
- } MBOpt;
-
- struct {
- ARM_PROC::IFlags Val;
- } IFlags;
-
- struct {
- unsigned Val;
- } MMask;
-
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- // A vector register list is a sequential list of 1 to 4 registers.
- struct {
- unsigned RegNum;
- unsigned Count;
- unsigned LaneIndex;
- bool isDoubleSpaced;
- } VectorList;
-
- struct {
- unsigned Val;
- } VectorIndex;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- /// Combined record for all forms of ARM address expressions.
- struct {
- unsigned BaseRegNum;
- // Offset is in OffsetReg or OffsetImm. If both are zero, no offset
- // was specified.
- const MCConstantExpr *OffsetImm; // Offset immediate value
- unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL
- ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
- unsigned ShiftImm; // shift for OffsetReg.
- unsigned Alignment; // 0 = no alignment specified
- // n = alignment in bytes (2, 4, 8, 16, or 32)
- unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit)
- } Memory;
-
- struct {
- unsigned RegNum;
- bool isAdd;
- ARM_AM::ShiftOpc ShiftTy;
- unsigned ShiftImm;
- } PostIdxReg;
-
- struct {
- bool isASR;
- unsigned Imm;
- } ShifterImm;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftReg;
- unsigned ShiftImm;
- } RegShiftedReg;
- struct {
- ARM_AM::ShiftOpc ShiftTy;
- unsigned SrcReg;
- unsigned ShiftImm;
- } RegShiftedImm;
- struct {
- unsigned Imm;
- } RotImm;
- struct {
- unsigned LSB;
- unsigned Width;
- } Bitfield;
+ struct CCOp CC;
+ struct CopOp Cop;
+ struct CoprocOptionOp CoprocOption;
+ struct MBOptOp MBOpt;
+ struct ITMaskOp ITMask;
+ struct IFlagsOp IFlags;
+ struct MMaskOp MMask;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct MemoryOp Memory;
+ struct PostIdxRegOp PostIdxReg;
+ struct ShifterImmOp ShifterImm;
+ struct RegShiftedRegOp RegShiftedReg;
+ struct RegShiftedImmOp RegShiftedImm;
+ struct RotImmOp RotImm;
+ struct BitfieldOp Bitfield;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!tryParseRegisterWithWriteBack(Operands))
- return false;
- int Res = tryParseShiftRegister(Operands);
- if (Res == 0) // success
- return false;
- else if (Res == -1) // irrecoverable error
- return true;
- // If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" &&
- Parser.getTok().getString().equals_lower("apsr_nzcv")) {
- S = Parser.getTok().getLoc();
- Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
- return false;
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
}
// Fall though for the Identifier case that is not a register or a
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 586834cf73..b832508a08 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
+ A15SDOptimizer.cpp
ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 4c0f93c6cd..96a252e1b8 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1949,6 +1949,10 @@ isValidOffset(const int Opcode, const int Offset) const {
// the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
// inserted to calculate the final address. Due to this reason, the function
// assumes that the "Offset" has correct alignment.
+ // We used to assert if the offset was not properly aligned, however,
+ // there are cases where a misaligned pointer recast can cause this
+ // problem, and we need to allow for it. The front end warns of such
+ // misaligns with respect to load size.
switch(Opcode) {
@@ -1958,7 +1962,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STriw_indexed:
case Hexagon::STriw:
case Hexagon::STriw_f:
- assert((Offset % 4 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
(Offset <= Hexagon_MEMW_OFFSET_MAX);
@@ -1968,14 +1971,12 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::STrid:
case Hexagon::STrid_indexed:
case Hexagon::STrid_f:
- assert((Offset % 8 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
(Offset <= Hexagon_MEMD_OFFSET_MAX);
case Hexagon::LDrih:
case Hexagon::LDriuh:
case Hexagon::STrih:
- assert((Offset % 2 == 0) && "Offset has incorrect alignment");
return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
(Offset <= Hexagon_MEMH_OFFSET_MAX);
@@ -2002,7 +2003,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::MEMw_SUBr_MEM_V4 :
case Hexagon::MEMw_ANDr_MEM_V4 :
case Hexagon::MEMw_ORr_MEM_V4 :
- assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
return (0 <= Offset && Offset <= 255);
case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
@@ -2017,7 +2017,6 @@ isValidOffset(const int Opcode, const int Offset) const {
case Hexagon::MEMh_SUBr_MEM_V4 :
case Hexagon::MEMh_ANDr_MEM_V4 :
case Hexagon::MEMh_ORr_MEM_V4 :
- assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
return (0 <= Offset && Offset <= 127);
case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index ad495ff306..dda6e247ac 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ unsigned OffReg;
+ const MCExpr *Off;
+ };
+
+ struct FslImmOp {
+ const MCExpr *Val;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- unsigned OffReg;
- const MCExpr *Off;
- } Mem;
-
- struct {
- const MCExpr *Val;
- } FslImm;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
+ struct FslImmOp FslImm;
};
MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index d0fd7dcec1..bd83afc1cc 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
}
void MBlazeRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const {
// Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 99a2fac95c..497f3866c9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index ae2e55617d..e504011dfd 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
}
void
-MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *) const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index a077dd7351..c673f59b5e 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -50,7 +50,8 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index ade6084752..c403f216b0 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser {
MipsAsmParser::OperandMatchResultTy
parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterClass);
+
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
StringRef Mnemonic);
@@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCInst> &Instructions);
void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res);
@@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
+ bool parseSetAssignment();
+
bool parseDirectiveWord(unsigned Size, SMLoc L);
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
@@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getReg(int RC,int RegNo);
int getATReg();
+
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
@@ -211,25 +222,30 @@ private:
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ struct Token {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNum;
+ RegisterKind Kind;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned Base;
+ const MCExpr *Off;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNum;
- RegisterKind Kind;
- } Reg;
-
- struct {
- const MCExpr *Val;
- } Imm;
-
- struct {
- unsigned Base;
- const MCExpr *Off;
- } Mem;
+ struct Token Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
SMLoc StartLoc, EndLoc;
@@ -385,6 +401,56 @@ public:
};
}
+namespace llvm {
+extern const MCInstrDesc MipsInsts[];
+}
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
+ return MipsInsts[Opcode];
+}
+
+bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ Inst.setLoc(IDLoc);
+ if (MCID.mayLoad() || MCID.mayStore()) {
+ // Check the offset of memory operand, if it is a symbol
+ // reference or immediate we may have to expand instructions
+ for (unsigned i=0;i<MCID.getNumOperands();i++) {
+ const MCOperandInfo &OpInfo = MCID.OpInfo[i];
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ MCOperand &Op = Inst.getOperand(i);
+ if (Op.isImm()) {
+ int MemOffset = Op.getImm();
+ if (MemOffset < -32768 || MemOffset > 32767) {
+ // Offset can't exceed 16bit value
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+ return false;
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Expr = Op.getExpr();
+ if (Expr->getKind() == MCExpr::SymbolRef){
+ const MCSymbolRefExpr *SR =
+ static_cast<const MCSymbolRefExpr*>(Expr);
+ if (SR->getKind() == MCSymbolRefExpr::VK_None) {
+ // Expand symbol
+ expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (needsExpansion(Inst))
+ expandInstruction(Inst, IDLoc, Instructions);
+ else
+ Instructions.push_back(Inst);
+
+ return false;
+}
+
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
switch(Inst.getOpcode()) {
@@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
}
+void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad,bool isImmOpnd) {
+ const MCSymbolRefExpr *SR;
+ MCInst TempInst;
+ unsigned ImmOffset,HiOffset,LoOffset;
+ const MCExpr *ExprOffset;
+ unsigned TmpRegNum;
+ unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
+ Mips::CPURegsRegClassID,
+ getATReg());
+ // 1st operand is either source or dst register
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ unsigned RegOpNum = Inst.getOperand(0).getReg();
+ // 2nd operand is base register
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ unsigned BaseRegNum = Inst.getOperand(1).getReg();
+ // 3rd operand is either immediate or expression
+ if (isImmOpnd) {
+ assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
+ ImmOffset = Inst.getOperand(2).getImm();
+ LoOffset = ImmOffset & 0x0000ffff;
+ HiOffset = (ImmOffset & 0xffff0000) >> 16;
+ // If msb of LoOffset is 1(negative number) we must increment HiOffset
+ if (LoOffset & 0x8000)
+ HiOffset++;
+ }
+ else
+ ExprOffset = Inst.getOperand(2).getExpr();
+ // All instructions will have the same location
+ TempInst.setLoc(IDLoc);
+ // 1st instruction in expansion is LUi. For load instruction we can use
+ // the dst register as a temporary if base and dst are different,
+ // but for stores we must use $at
+ TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+ TempInst.setOpcode(Mips::LUi);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(HiOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ }
+ }
+ // Add the instruction to the list
+ Instructions.push_back(TempInst);
+ // and prepare TempInst for next instruction
+ TempInst.clear();
+ // which is add temp register to base
+ TempInst.setOpcode(Mips::ADDu);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+ // and finaly, create original instruction with low part
+ // of offset and new base
+ TempInst.setOpcode(Inst.getOpcode());
+ TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ if (isImmOpnd)
+ TempInst.addOperand(MCOperand::CreateImm(LoOffset));
+ else {
+ if (ExprOffset->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
+ Create(SR->getSymbol().getName(),
+ MCSymbolRefExpr::VK_Mips_ABS_LO,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+ }
+ }
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+}
+
bool MipsAsmParser::
MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
+ SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
MatchingInlineAsm);
switch (MatchResult) {
default: break;
case Match_Success: {
- if (needsExpansion(Inst)) {
- SmallVector<MCInst, 4> Instructions;
- expandInstruction(Inst, IDLoc, Instructions);
- for(unsigned i =0; i < Instructions.size(); i++){
- Out.EmitInstruction(Instructions[i]);
- }
- } else {
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst);
- }
+ if (processInstruction(Inst,IDLoc,Instructions))
+ return true;
+ for(unsigned i =0; i < Instructions.size(); i++)
+ Out.EmitInstruction(Instructions[i]);
return false;
}
case Match_MissingFeature:
@@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return false;
}
case AsmToken::Identifier:
+ // Look for the existing symbol, we should check if
+ // we need to assigne the propper RegisterKind
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
+ return false;
+ //else drop to expression parsing
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
// Check the type of the expression
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
- // it's a constant, evaluate lo or hi value
- int Val = MCE->getValue();
+ // It's a constant, evaluate lo or hi value
if (Str == "lo") {
- Val = Val & 0xffff;
+ short Val = MCE->getValue();
+ Res = MCConstantExpr::Create(Val, getContext());
} else if (Str == "hi") {
+ int Val = MCE->getValue();
int LoSign = Val & 0x8000;
Val = (Val & 0xffff0000) >> 16;
- //lower part is treated as signed int, so if it is negative
- //we must add 1 to hi part to compensate
+ // Lower part is treated as a signed int, so if it is negative
+ // we must add 1 to the hi part to compensate
if (LoSign)
Val++;
+ Res = MCConstantExpr::Create(Val, getContext());
}
- Res = MCConstantExpr::Create(Val, getContext());
return false;
}
if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
- // it's a symbol, create symbolic expression from symbol
+ // It's a symbol, create symbolic expression from symbol
StringRef Symbol = MSRE->getSymbol().getName();
MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
@@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
switch(getLexer().getKind()) {
default:
return true;
+ case AsmToken::Identifier:
case AsmToken::Integer:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!isMips64())
return MatchOperand_NoMatch;
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
// if the first token is not '$' we have an error
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
}
+bool MipsAsmParser::
+searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ unsigned RegisterKind) {
+
+ MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
+ if (Sym) {
+ SMLoc S = Parser.getTok().getLoc();
+ const MCExpr *Expr;
+ if (Sym->isVariable())
+ Expr = Sym->getVariableValue();
+ else
+ return false;
+ if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ const StringRef DefSymbol = Ref->getSymbol().getName();
+ if (DefSymbol.startswith("$")) {
+ // Lookup for the register with corresponding name
+ int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+ if (RegNum > -1) {
+ Parser.Lex();
+ MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
+ Parser.getTok().getLoc());
+ op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ } else if (Expr->getKind() == MCExpr::Constant) {
+ Parser.Lex();
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
+ MipsOperand *op = MipsOperand::CreateImm(Const,S,
+ Parser.getTok().getLoc());
+ Operands.push_back(op);
+ return true;
+ }
+ }
+ return false;
+}
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+ return MatchOperand_Success;
+ return MatchOperand_NoMatch;
+ }
// if the first token is not '$' we have an error
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
}
bool MipsAsmParser::parseSetNoAtDirective() {
- // line should look like:
+ // Line should look like:
// .set noat
// set at reg to 0
Options.setATReg(0);
// eat noat
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
} else if (getLexer().is(AsmToken::Equal)) {
- getParser().Lex(); //eat '='
+ getParser().Lex(); // eat '='
if (getLexer().isNot(AsmToken::Dollar)) {
reportParseError("unexpected token in statement");
return false;
}
- Parser.Lex(); // eat '$'
+ Parser.Lex(); // Eat '$'
const AsmToken &Reg = Parser.getTok();
if (Reg.is(AsmToken::Identifier)) {
AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() {
reportParseError("unexpected token in statement");
return false;
}
- getParser().Lex(); //eat reg
+ getParser().Lex(); // Eat reg
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
@@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() {
bool MipsAsmParser::parseSetReorderDirective() {
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report error
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
@@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
Parser.Lex(); // Consume the EndOfStatement
return false;
}
+
+bool MipsAsmParser::parseSetAssignment() {
+ StringRef Name;
+ const MCExpr *Value;
+
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier after .set");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return reportParseError("unexpected token in .set directive");
+ Lex(); //eat comma
+
+ if (Parser.parseExpression(Value))
+ reportParseError("expected valid expression after comma");
+
+ // check if the Name already exists as a symbol
+ MCSymbol *Sym = getContext().LookupSymbol(Name);
+ if (Sym) {
+ return reportParseError("symbol already defined");
+ }
+ Sym = getContext().GetOrCreateSymbol(Name);
+ Sym->setVariableValue(Value);
+
+ return false;
+}
bool MipsAsmParser::parseDirectiveSet() {
// get next token
@@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() {
// ignore this directive for now
Parser.eatToEndOfStatement();
return false;
+ } else {
+ // it is just an identifier, look for assignment
+ parseSetAssignment();
+ return false;
}
return true;
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 58aa1be34d..cf8bb189e4 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen)
add_llvm_target(MipsCodeGen
Mips16FrameLowering.cpp
Mips16InstrInfo.cpp
+ Mips16ISelDAGToDAG.cpp
+ Mips16ISelLowering.cpp
Mips16RegisterInfo.cpp
MipsAnalyzeImmediate.cpp
MipsAsmPrinter.cpp
@@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
+ MipsSEISelDAGToDAG.cpp
+ MipsSEISelLowering.cpp
MipsSERegisterInfo.cpp
MipsSubtarget.cpp
MipsTargetMachine.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 96f93a0789..e198a7c983 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
assert(MO.isExpr() &&
"getBranchTargetOpValue expects only expressions or immediates");
@@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm()) return MO.getImm();
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm()>>2;
+
assert(MO.isExpr() &&
"getJumpTargetOpValue expects only expressions or an immediate");
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
new file mode 100644
index 0000000000..00b3449300
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -0,0 +1,308 @@
+//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = Mips::Mflo16;
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = Mips::Mfhi16;
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC =
+ (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+ V2 = RegInfo.createVirtualRegister(RC);
+
+ BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+ .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
+ BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
+ .addReg(V1).addReg(V2);
+}
+
+// Insert instructions to initialize the Mips16 SP Alias register in the
+// first MBB of the function.
+//
+void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->mips16SPAliasRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
+
+ BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
+ .addReg(Mips::SP);
+}
+
+void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+ initMips16SPAliasReg(MF);
+}
+
+/// getMips16SPAliasReg - Output the instructions required to put the
+/// SP into a Mips16 accessible aliased register.
+SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
+ unsigned Mips16SPAliasReg =
+ MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
+ return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
+}
+
+void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
+ SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
+ if (Parent) {
+ switch (Parent->getOpcode()) {
+ case ISD::LOAD: {
+ LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ case ISD::STORE: {
+ StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
+ switch (SD->getMemoryVT().getSizeInBits()) {
+ case 8:
+ case 16:
+ AliasReg = TM.getFrameLowering()->hasFP(*MF)?
+ AliasFPReg: getMips16SPAliasReg();
+ return;
+ }
+ break;
+ }
+ }
+ }
+ AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
+ return;
+
+}
+
+bool Mips16DAGToDAGISel::selectAddr16(
+ SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
+ SDValue &Alias) {
+ EVT ValTy = Addr.getValueType();
+
+ Alias = CurDAG->getTargetConstant(0, ValTy);
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ return true;
+ }
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ getMips16SPRefReg(Parent, Alias);
+ }
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+
+ // If an indexed floating point load/store can be emitted, return false.
+ const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
+ if (LS &&
+ (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+ Subtarget.hasFPIdx())
+ return false;
+ }
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE:
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2), CmpLHS;
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ unsigned MOp;
+ if (Opcode == ISD::ADDE) {
+ CmpLHS = InFlag.getValue(0);
+ MOp = Mips::AdduRxRyRz16;
+ } else {
+ CmpLHS = InFlag.getOperand(0);
+ MOp = Mips::SubuRxRyRz16;
+ }
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+
+ EVT VT = LHS.getValueType();
+
+ unsigned Sltu_op = Mips::SltuRxRyRz16;
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ unsigned Addu_op = Mips::AdduRxRyRz16;
+ SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
+ SDValue(Carry,0), RHS);
+
+ SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry,0));
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16);
+ SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
+ return new Mips16DAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
new file mode 100644
index 0000000000..baa85877d9
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -0,0 +1,51 @@
+//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16ISELDAGTODAG_H
+#define MIPS16ISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class Mips16DAGToDAGISel : public MipsDAGToDAGISel {
+public:
+ explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDValue getMips16SPAliasReg();
+
+ void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+
+ void initMips16SPAliasReg(MachineFunction &MF);
+};
+
+FunctionPass *createMips16ISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
new file mode 100644
index 0000000000..23eb5375ac
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -0,0 +1,689 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-lower"
+#include "Mips16ISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<bool>
+Mips16HardFloat("mips16-hard-float", cl::NotHidden,
+ cl::desc("MIPS: mips16 hard float enable."),
+ cl::init(false));
+
+static cl::opt<bool> DontExpandCondPseudos16(
+ "mips16-dont-expand-cond-pseudo",
+ cl::init(false),
+ cl::desc("Dont expand conditional move related "
+ "pseudos for Mips 16"),
+ cl::Hidden);
+
+namespace {
+ std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded;
+}
+
+Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ //
+ // set up as if mips32 and then revert so we can test the mechanism
+ // for switching
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+ computeRegisterProperties();
+ clearRegisterClasses();
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
+
+ if (Mips16HardFloat)
+ setMips16HardFloatLibCalls();
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMips16TargetLowering(MipsTargetMachine &TM) {
+ return new Mips16TargetLowering(TM);
+}
+
+bool
+Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ return false;
+}
+
+MachineBasicBlock *
+Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::SelBeqZ:
+ return emitSel16(Mips::BeqzRxImm16, MI, BB);
+ case Mips::SelBneZ:
+ return emitSel16(Mips::BnezRxImm16, MI, BB);
+ case Mips::SelTBteqZCmpi:
+ return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSlti:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBteqZSltiu:
+ return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBtneZCmpi:
+ return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSlti:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SelTBtneZSltiu:
+ return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SelTBteqZCmp:
+ return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBteqZSlt:
+ return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBteqZSltu:
+ return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::SelTBtneZCmp:
+ return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::SelTBtneZSlt:
+ return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::SelTBtneZSltu:
+ return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BteqzT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BteqzT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BtnezT8CmpX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ case Mips::BtnezT8SltX16:
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ case Mips::BtnezT8SltuX16:
+ // TBD: figure out a way to get this or remove the instruction
+ // altogether.
+ return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
+ Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ break;
+ case Mips::SltCCRxRy16:
+ return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
+ break;
+ case Mips::SltiCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
+ case Mips::SltiuCCRxImmX16:
+ return emitFEXT_CCRXI16_ins
+ (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
+ case Mips::SltuCCRxRy16:
+ return emitFEXT_CCRX16_ins
+ (Mips::SltuRxRy16, MI, BB);
+ }
+}
+
+bool Mips16TargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ // No tail call optimization for mips16.
+ return false;
+}
+
+void Mips16TargetLowering::setMips16LibcallName
+ (RTLIB::Libcall L, const char *Name) {
+ setLibcallName(L, Name);
+ NoHelperNeeded.insert(Name);
+}
+
+void Mips16TargetLowering::setMips16HardFloatLibCalls() {
+ setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
+ setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
+ setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
+ setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
+ setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
+ setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
+ setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
+ setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
+ setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
+ setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
+ setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
+ setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
+ setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
+ setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
+ setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
+ setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
+ setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
+ setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
+ setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
+ setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
+ setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
+ setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
+ setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
+ setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
+ setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
+ setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
+ setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
+ setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
+ setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
+}
+
+
+//
+// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
+// cleaner way to do all of this but it will have to wait until the traditional
+// gcc mechanism is completed.
+//
+// For Pic, in order for Mips16 code to call Mips32 code which according the abi
+// have either arguments or returned values placed in floating point registers,
+// we use a set of helper functions. (This includes functions which return type
+// complex which on Mips are returned in a pair of floating point registers).
+//
+// This is an encoding that we inherited from gcc.
+// In Mips traditional O32, N32 ABI, floating point numbers are passed in
+// floating point argument registers 1,2 only when the first and optionally
+// the second arguments are float (sf) or double (df).
+// For Mips16 we are only concerned with the situations where floating point
+// arguments are being passed in floating point registers by the ABI, because
+// Mips16 mode code cannot execute floating point instructions to load those
+// values and hence helper functions are needed.
+// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
+// the helper function suffixs for these are:
+// 0, 1, 5, 9, 2, 6, 10
+// this suffix can then be calculated as follows:
+// for a given argument Arg:
+// Arg1x, Arg2x = 1 : Arg is sf
+// 2 : Arg is df
+// 0: Arg is neither sf or df
+// So this stub is the string for number Arg1x + Arg2x*4.
+// However not all numbers between 0 and 10 are possible, we check anyway and
+// assert if the impossible exists.
+//
+
+unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
+ (ArgListTy &Args) const {
+ unsigned int resultNum = 0;
+ if (Args.size() >= 1) {
+ Type *t = Args[0].Ty;
+ if (t->isFloatTy()) {
+ resultNum = 1;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum = 2;
+ }
+ }
+ if (resultNum) {
+ if (Args.size() >=2) {
+ Type *t = Args[1].Ty;
+ if (t->isFloatTy()) {
+ resultNum += 4;
+ }
+ else if (t->isDoubleTy()) {
+ resultNum += 8;
+ }
+ }
+ }
+ return resultNum;
+}
+
+//
+// prefixs are attached to stub numbers depending on the return type .
+// return type: float sf_
+// double df_
+// single complex sc_
+// double complext dc_
+// others NO PREFIX
+//
+//
+// The full name of a helper function is__mips16_call_stub +
+// return type dependent prefix + stub number
+//
+//
+// This is something that probably should be in a different source file and
+// perhaps done differently but my main purpose is to not waste runtime
+// on something that we can enumerate in the source. Another possibility is
+// to have a python script to generate these mapping tables. This will do
+// for now. There are a whole series of helper function mapping arrays, one
+// for each return type class as outlined above. There there are 11 possible
+// entries. Ones with 0 are ones which should never be selected
+//
+// All the arrays are similar except for ones which return neither
+// sf, df, sc, dc, in which only care about ones which have sf or df as a
+// first parameter.
+//
+#define P_ "__mips16_call_stub_"
+#define MAX_STUB_NUMBER 10
+#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
+#define T P "0" , T1
+#define P P_
+static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
+ {0, T1 };
+#undef P
+#define P P_ "sf_"
+static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "df_"
+static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "sc_"
+static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#define P P_ "dc_"
+static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
+ { T };
+#undef P
+#undef P_
+
+
+const char* Mips16TargetLowering::
+ getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
+ const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
+#ifndef NDEBUG
+ const unsigned int maxStubNum = 10;
+ assert(stubNum <= maxStubNum);
+ const bool validStubNum[maxStubNum+1] =
+ {true, true, true, false, false, true, true, false, false, true, true};
+ assert(validStubNum[stubNum]);
+#endif
+ const char *result;
+ if (RetTy->isFloatTy()) {
+ result = sfMips16Helper[stubNum];
+ }
+ else if (RetTy ->isDoubleTy()) {
+ result = dfMips16Helper[stubNum];
+ }
+ else if (RetTy->isStructTy()) {
+ // check if it's complex
+ if (RetTy->getNumContainedTypes() == 2) {
+ if ((RetTy->getContainedType(0)->isFloatTy()) &&
+ (RetTy->getContainedType(1)->isFloatTy())) {
+ result = scMips16Helper[stubNum];
+ }
+ else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
+ (RetTy->getContainedType(1)->isDoubleTy())) {
+ result = dcMips16Helper[stubNum];
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ llvm_unreachable("Uncovered condition");
+ }
+ }
+ else {
+ if (stubNum == 0) {
+ needHelper = false;
+ return "";
+ }
+ result = vMips16Helper[stubNum];
+ }
+ needHelper = true;
+ return result;
+}
+
+void Mips16TargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ SelectionDAG &DAG = CLI.DAG;
+ const char* Mips16HelperFunction = 0;
+ bool NeedMips16Helper = false;
+
+ if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) {
+ //
+ // currently we don't have symbols tagged with the mips16 or mips32
+ // qualifier so we will assume that we don't know what kind it is.
+ // and generate the helper
+ //
+ bool LookupHelper = true;
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
+ LookupHelper = false;
+ }
+ }
+ if (LookupHelper) Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
+
+ }
+
+ SDValue JumpTarget = Callee;
+
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned V0Reg = Mips::V0;
+ if (NeedMips16Helper) {
+ RegsToPass.push_front(std::make_pair(V0Reg, Callee));
+ JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+ JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ } else
+ RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
+ }
+
+ Ops.push_back(JumpTarget);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock *Mips16TargetLowering::
+emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
+ .addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSelT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addReg(MI->getOperand(4).getReg());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitSeliT16
+ (unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
+ .addImm(MI->getOperand(4).getImm());
+ BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ BuildMI(*BB, BB->begin(), DL,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
+
+MachineBasicBlock
+ *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ unsigned regY = MI->getOperand(1).getReg();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned regX = MI->getOperand(0).getReg();
+ int64_t imm = MI->getOperand(1).getImm();
+ MachineBasicBlock *target = MI->getOperand(2).getMBB();
+ unsigned CmpOpc;
+ if (isUInt<8>(imm))
+ CmpOpc = CmpiOpc;
+ else if (isUInt<16>(imm))
+ CmpOpc = CmpiXOpc;
+ else
+ llvm_unreachable("immediate field not usable");
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+static unsigned Mips16WhichOp8uOr16simm
+ (unsigned shortOp, unsigned longOp, int64_t Imm) {
+ if (isUInt<8>(Imm))
+ return shortOp;
+ else if (isInt<16>(Imm))
+ return longOp;
+ else
+ llvm_unreachable("immediate field not usable");
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ unsigned regY = MI->getOperand(2).getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const {
+ if (DontExpandCondPseudos16)
+ return BB;
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ unsigned CC = MI->getOperand(0).getReg();
+ unsigned regX = MI->getOperand(1).getReg();
+ int64_t Imm = MI->getOperand(2).getImm();
+ unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(SltOpc)).addReg(regX).addImm(Imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(),
+ TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+
+}
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
new file mode 100644
index 0000000000..b23e2a1f37
--- /dev/null
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -0,0 +1,80 @@
+//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips16.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Mips16ISELLOWERING_H
+#define Mips16ISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class Mips16TargetLowering : public MipsTargetLowering {
+ public:
+ explicit Mips16TargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+
+ void setMips16HardFloatLibCalls();
+
+ unsigned int
+ getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+
+ const char *getMips16HelperFunction
+ (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
+ MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_T8I8I16_ins(
+ unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRX16_ins(
+ unsigned SltOpc,
+ MachineInstr *MI, MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *emitFEXT_CCRXI16_ins(
+ unsigned SltiOpc, unsigned SltiXOpc,
+ MachineInstr *MI, MachineBasicBlock *BB )const;
+ };
+}
+
+#endif // Mips16ISELLOWERING_H
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index a9e9c52716..629382965b 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -15,7 +15,7 @@
// Mips Address
//
def addr16 :
- ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>;
+ ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>;
//
// Address operand
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 6dff548505..77b08cb11e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -12,19 +12,19 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-isel"
+#include "MipsISelDAGToDAG.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "MipsSEISelDAGToDAG.h"
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -45,270 +45,11 @@ using namespace llvm;
// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
// instructions for SelectionDAG operations.
//===----------------------------------------------------------------------===//
-namespace {
-
-class MipsDAGToDAGISel : public SelectionDAGISel {
-
- /// TM - Keep a reference to MipsTargetMachine.
- MipsTargetMachine &TM;
-
- /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
-
-public:
- explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
- // Pass Name
- virtual const char *getPassName() const {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
-private:
- // Include the pieces autogenerated from the target description.
- #include "MipsGenDAGISel.inc"
-
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
- const MipsTargetMachine &getTargetMachine() {
- return static_cast<const MipsTargetMachine &>(TM);
- }
-
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
- const MipsInstrInfo *getInstrInfo() {
- return getTargetMachine().getInstrInfo();
- }
-
- SDNode *getGlobalBaseReg();
-
- SDValue getMips16SPAliasReg();
-
- void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg);
-
- std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
- EVT Ty, bool HasLo, bool HasHi);
-
- SDNode *Select(SDNode *N);
-
- // Complex Pattern.
- /// (reg + imm).
- bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Fall back on this function if all else fails.
- bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- /// Match integer address pattern.
- bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const;
-
- bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Alias);
-
- // getImm - Return a target constant with the specified value.
- inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
- }
-
- void ProcessFunctionAfterISel(MachineFunction &MF);
- bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
- void InitGlobalBaseReg(MachineFunction &MF);
- void InitMips16SPAliasReg(MachineFunction &MF);
-
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
-};
-
-}
-
-// Insert instructions to initialize the global base register in the
-// first MBB of the function. When the ABI is O32 and the relocation model is
-// PIC, the necessary instructions are emitted later to prevent optimization
-// passes from moving them.
-void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->globalBaseRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
- const TargetRegisterClass *RC;
-
- if (Subtarget.isABI_N64())
- RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
- else if (Subtarget.inMips16Mode())
- RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
- else
- RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
- V0 = RegInfo.createVirtualRegister(RC);
- V1 = RegInfo.createVirtualRegister(RC);
- V2 = RegInfo.createVirtualRegister(RC);
-
- if (Subtarget.isABI_N64()) {
- MF.getRegInfo().addLiveIn(Mips::T9_64);
- MBB.addLiveIn(Mips::T9_64);
-
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // daddu $v1, $v0, $t9
- // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
- .addReg(Mips::T9_64);
- BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- if (Subtarget.inMips16Mode()) {
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
- BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
- BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
- .addReg(V1).addReg(V2);
- return;
- }
-
- if (MF.getTarget().getRelocationModel() == Reloc::Static) {
- // Set global register to __gnu_local_gp.
- //
- // lui $v0, %hi(__gnu_local_gp)
- // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
- .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
- return;
- }
-
- MF.getRegInfo().addLiveIn(Mips::T9);
- MBB.addLiveIn(Mips::T9);
-
- if (Subtarget.isABI_N32()) {
- // lui $v0, %hi(%neg(%gp_rel(fname)))
- // addu $v1, $v0, $t9
- // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
- const GlobalValue *FName = MF.getFunction();
- BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
- .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
- return;
- }
-
- assert(Subtarget.isABI_O32());
-
- // For O32 ABI, the following instruction sequence is emitted to initialize
- // the global base register:
- //
- // 0. lui $2, %hi(_gp_disp)
- // 1. addiu $2, $2, %lo(_gp_disp)
- // 2. addu $globalbasereg, $2, $t9
- //
- // We emit only the last instruction here.
- //
- // GNU linker requires that the first two instructions appear at the beginning
- // of a function and no instructions be inserted before or between them.
- // The two instructions are emitted during lowering to MC layer in order to
- // avoid any reordering.
- //
- // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
- // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
- // reads it.
- MF.getRegInfo().addLiveIn(Mips::V0);
- MBB.addLiveIn(Mips::V0);
- BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
- .addReg(Mips::V0).addReg(Mips::T9);
-}
-
-// Insert instructions to initialize the Mips16 SP Alias register in the
-// first MBB of the function.
-//
-void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-
- if (!MipsFI->mips16SPAliasRegSet())
- return;
-
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator I = MBB.begin();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
- unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg();
-
- BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg)
- .addReg(Mips::SP);
-}
-
-
-bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
- const MachineInstr& MI) {
- unsigned DstReg = 0, ZeroReg = 0;
-
- // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
- if ((MI.getOpcode() == Mips::ADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO;
- } else if ((MI.getOpcode() == Mips::DADDiu) &&
- (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
- (MI.getOperand(2).getImm() == 0)) {
- DstReg = MI.getOperand(0).getReg();
- ZeroReg = Mips::ZERO_64;
- }
-
- if (!DstReg)
- return false;
-
- // Replace uses with ZeroReg.
- for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
- E = MRI->use_end(); U != E;) {
- MachineOperand &MO = U.getOperand();
- unsigned OpNo = U.getOperandNo();
- MachineInstr *MI = MO.getParent();
- ++U;
-
- // Do not replace if it is a phi's operand or is tied to def operand.
- if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
- continue;
-
- MO.setReg(ZeroReg);
- }
-
- return true;
-}
-
-void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
- InitGlobalBaseReg(MF);
- InitMips16SPAliasReg(MF);
-
- MachineRegisterInfo *MRI = &MF.getRegInfo();
-
- for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
- ++MFI)
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- ReplaceUsesWithZeroReg(MRI, *I);
-}
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
- ProcessFunctionAfterISel(MF);
+ processFunctionAfterISel(MF);
return Ret;
}
@@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-/// getMips16SPAliasReg - Output the instructions required to put the
-/// SP into a Mips16 accessible aliased register.
-SDValue MipsDAGToDAGISel::getMips16SPAliasReg() {
- unsigned Mips16SPAliasReg =
- MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
- return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy());
-}
-
/// ComplexPattern used on MipsInstrInfo
/// Used on Mips Load/Store instructions
bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- EVT ValTy = Addr.getValueType();
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
- }
-
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
-
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
-
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0)))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
-
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
- }
-
+ llvm_unreachable("Unimplemented function.");
return false;
}
bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
- return selectAddrRegImm(Addr, Base, Offset) ||
- selectAddrDefault(Addr, Base, Offset);
-}
-
-void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
- SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy());
- if (Parent) {
- switch (Parent->getOpcode()) {
- case ISD::LOAD: {
- LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- case ISD::STORE: {
- StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent);
- switch (SD->getMemoryVT().getSizeInBits()) {
- case 8:
- case 16:
- AliasReg = TM.getFrameLowering()->hasFP(*MF)?
- AliasFPReg: getMips16SPAliasReg();
- return;
- }
- break;
- }
- }
- }
- AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy());
- return;
-
-}
-bool MipsDAGToDAGISel::SelectAddr16(
- SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset,
- SDValue &Alias) {
- EVT ValTy = Addr.getValueType();
-
- Alias = CurDAG->getTargetConstant(0, ValTy);
-
- // if Address is FI, get the TargetFrameIndex.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- Offset = CurDAG->getTargetConstant(0, ValTy);
- getMips16SPRefReg(Parent, Alias);
- return true;
- }
- // on PIC code Load GA
- if (Addr.getOpcode() == MipsISD::Wrapper) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
- }
- if (TM.getRelocationModel() != Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress))
- return false;
- }
- // Addresses of the form FI+const or FI|const
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<16>(CN->getSExtValue())) {
-
- // If the first operand is a FI, get the TargetFI Node
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
- (Addr.getOperand(0))) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
- getMips16SPRefReg(Parent, Alias);
- }
- else
- Base = Addr.getOperand(0);
-
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
- return true;
- }
- }
- // Operand is a result from an ADD.
- if (Addr.getOpcode() == ISD::ADD) {
- // When loading from constant pools, load the lower address part in
- // the instruction itself. Example, instead of:
- // lui $2, %hi($CPI1_0)
- // addiu $2, $2, %lo($CPI1_0)
- // lwc1 $f0, 0($2)
- // Generate:
- // lui $2, %hi($CPI1_0)
- // lwc1 $f0, %lo($CPI1_0)($2)
- if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
- Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
- SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
- if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
- isa<JumpTableSDNode>(Opnd0)) {
- Base = Addr.getOperand(0);
- Offset = Opnd0;
- return true;
- }
- }
-
- // If an indexed floating point load/store can be emitted, return false.
- const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasFPIdx())
- return false;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, ValTy);
- return true;
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
- bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
- SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
- N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
-
- if (HasLo) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 :
- (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
- }
- if (HasHi) {
- unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 :
- (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
- Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag);
- }
- return std::make_pair(Lo, Hi);
+bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) {
+ llvm_unreachable("Unimplemented function.");
+ return false;
}
-
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
- DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return NULL;
}
- ///
- // Instruction Selection not handled by the auto-generated
- // tablegen selection should be handled here.
- ///
- EVT NodeTy = Node->getValueType(0);
- unsigned MultOpc;
+ // See if subclasses can handle this node.
+ std::pair<bool, SDNode*> Ret = selectNode(Node);
+
+ if (Ret.first)
+ return Ret.second;
switch(Opcode) {
default: break;
- case ISD::SUBE:
- case ISD::ADDE: {
- bool inMips16Mode = Subtarget.inMips16Mode();
- SDValue InFlag = Node->getOperand(2), CmpLHS;
- unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
- "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
- unsigned MOp;
- if (Opcode == ISD::ADDE) {
- CmpLHS = InFlag.getValue(0);
- if (inMips16Mode)
- MOp = Mips::AdduRxRyRz16;
- else
- MOp = Mips::ADDu;
- } else {
- CmpLHS = InFlag.getOperand(0);
- if (inMips16Mode)
- MOp = Mips::SubuRxRyRz16;
- else
- MOp = Mips::SUBu;
- }
-
- SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
-
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
-
- EVT VT = LHS.getValueType();
-
- unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu;
- SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2);
- unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu;
- SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT,
- SDValue(Carry,0), RHS);
-
- return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
- LHS, SDValue(AddCarry,0));
- }
-
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 :
- Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
- std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
- true, true);
-
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
- return NULL;
- }
-
- /// Special Muls
- case ISD::MUL: {
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && NodeTy == MVT::i32)
- break;
- return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
- dl, NodeTy, true, false).first;
- }
- case ISD::MULHS:
- case ISD::MULHU: {
- if (NodeTy == MVT::i32) {
- if (Subtarget.inMips16Mode())
- MultOpc = (Opcode == ISD::MULHU ?
- Mips::MultuRxRy16 : Mips::MultRxRy16);
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- }
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
- return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
- }
-
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
return getGlobalBaseReg();
- case ISD::ConstantFP: {
- ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.hasMips64()) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO_64, MVT::i64);
- return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
- }
-
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- Mips::ZERO, MVT::i32);
- return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
- Zero);
- }
- break;
- }
-
- case ISD::Constant: {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
- unsigned Size = CN->getValueSizeInBits(0);
-
- if (Size == 32)
- break;
-
- MipsAnalyzeImmediate AnalyzeImm;
- int64_t Imm = CN->getSExtValue();
-
- const MipsAnalyzeImmediate::InstSeq &Seq =
- AnalyzeImm.Analyze(Imm, Size, false);
-
- MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
- DebugLoc DL = CN->getDebugLoc();
- SDNode *RegOpnd;
- SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
-
- // The first instruction can be a LUi which is different from other
- // instructions (ADDiu, ORI and SLL) in that it does not have a register
- // operand.
- if (Inst->Opc == Mips::LUi64)
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
- else
- RegOpnd =
- CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
- ImmOpnd);
-
- // The remaining instructions in the sequence are handled here.
- for (++Inst; Inst != Seq.end(); ++Inst) {
- ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
- MVT::i64);
- RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
- SDValue(RegOpnd, 0), ImmOpnd);
- }
-
- return RegOpnd;
- }
-
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
@@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
"Unexpected unaligned loads/stores.");
break;
#endif
-
- case MipsISD::ThreadPointer: {
- EVT PtrVT = TLI.getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
-
- if (PtrVT == MVT::i32) {
- RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
- DestReg = Mips::V1;
- } else {
- RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
- DestReg = Mips::V1_64;
- }
-
- SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
- Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
- SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
- ReplaceUses(SDValue(Node, 0), ResNode);
- return ResNode.getNode();
- }
}
// Select the default instruction
@@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
- return new MipsDAGToDAGISel(TM);
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16ISelDag(TM);
+
+ return llvm::createMipsSEISelDag(TM);
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
new file mode 100644
index 0000000000..cf0f9c58aa
--- /dev/null
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -0,0 +1,93 @@
+//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSISELDAGTODAG_H
+#define MIPSISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+public:
+ explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
+ : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+protected:
+ SDNode *getGlobalBaseReg();
+
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const MipsSubtarget &Subtarget;
+
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MipsGenDAGISel.inc"
+
+ // Complex Pattern.
+ /// (reg + imm).
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Fall back on this function if all else fails.
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// Match integer address pattern.
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias);
+
+ virtual SDNode *Select(SDNode *N);
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
+
+ // getImm - Return a target constant with the specified value.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
+
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps);
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index c452dee8d0..4bf43f42e3 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-lower"
-#include <set>
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
@@ -42,26 +41,9 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
- cl::desc("MIPS: Enable tail calls."), cl::init(false));
-
-static cl::opt<bool>
LargeGOT("mxgot", cl::Hidden,
cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false));
-static cl::opt<bool>
-Mips16HardFloat("mips16-hard-float", cl::NotHidden,
- cl::desc("MIPS: mips16 hard float enable."),
- cl::init(false));
-
-static cl::opt<bool> DontExpandCondPseudos16(
- "mips16-dont-expand-cond-pseudo",
- cl::init(false),
- cl::desc("Dont expand conditional move related "
- "pseudos for Mips 16"),
- cl::Hidden);
-
-
static const uint16_t O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
@@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
-static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) {
+SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
@@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
}
-static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
+SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
+ bool HasMips64) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
@@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) {
return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
}
-static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
+SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+ unsigned Flag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
@@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
MachinePointerInfo::getGOT(), false, false, false, 0);
}
-static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) {
+SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag,
+ unsigned LoFlag) const {
DebugLoc DL = Op.getDebugLoc();
EVT Ty = Op.getValueType();
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
@@ -211,110 +196,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-namespace {
- struct LTStr {
- bool operator()(const char *S1, const char *S2) const
- {
- return strcmp(S1, S2) < 0;
- }
- };
-
- std::set<const char*, LTStr> NoHelperNeeded;
-}
-
-void MipsTargetLowering::setMips16LibcallName
- (RTLIB::Libcall L, const char *Name) {
- setLibcallName(L, Name);
- NoHelperNeeded.insert(Name);
-}
-
-void MipsTargetLowering::setMips16HardFloatLibCalls() {
- setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3");
- setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3");
- setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3");
- setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3");
- setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3");
- setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3");
- setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3");
- setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3");
- setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2");
- setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2");
- setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi");
- setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi");
- setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf");
- setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf");
- setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf");
- setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf");
- setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2");
- setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2");
- setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2");
- setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2");
- setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2");
- setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2");
- setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2");
- setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2");
- setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2");
- setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2");
- setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2");
- setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2");
- setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2");
- setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2");
- setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2");
- setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2");
-}
-
MipsTargetLowering::
MipsTargetLowering(MipsTargetMachine &TM)
: TargetLowering(TM, new MipsTargetObjectFile()),
Subtarget(&TM.getSubtarget<MipsSubtarget>()),
HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
IsO32(Subtarget->isABI_O32()) {
-
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
- // Set up the register classes
- addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
-
- if (HasMips64)
- addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
-
- if (Subtarget->inMips16Mode()) {
- addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
- if (Mips16HardFloat)
- setMips16HardFloatLibCalls();
- }
-
- if (Subtarget->hasDSP()) {
- MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
-
- for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
-
- // Expand all builtin opcodes.
- for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
- setOperationAction(Opc, VecTys[i], Expand);
-
- setOperationAction(ISD::LOAD, VecTys[i], Legal);
- setOperationAction(ISD::STORE, VecTys[i], Legal);
- setOperationAction(ISD::BITCAST, VecTys[i], Legal);
- }
- }
-
- if (!TM.Options.UseSoftFloat) {
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
-
- // When dealing with single precision only, use libcalls
- if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
- addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
- else
- addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
- }
- }
-
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
@@ -348,18 +240,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- }
- else {
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
- }
- if (!Subtarget->inMips16Mode()) {
- setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i32, Custom);
- }
if (!TM.Options.NoNaNsFPMath) {
setOperationAction(ISD::FABS, MVT::f32, Custom);
@@ -472,21 +352,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
- if (Subtarget->inMips16Mode()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
- }
-
setInsertFencesForAtomic(true);
if (!Subtarget->hasSEInReg()) {
@@ -523,7 +388,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setMinFunctionAlignment(HasMips64 ? 3 : 2);
setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
- computeRegisterProperties();
setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
@@ -531,22 +395,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
MaxStoresPerMemcpy = 16;
}
-bool
-MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
- MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-
- if (Subtarget->inMips16Mode())
- return false;
+const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
+ if (TM.getSubtargetImpl()->inMips16Mode())
+ return llvm::createMips16TargetLowering(TM);
- switch (SVT) {
- case MVT::i64:
- case MVT::i32:
- if (Fast)
- *Fast = true;
- return true;
- default:
- return false;
- }
+ return llvm::createMipsSETargetLowering(TM);
}
EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
@@ -1099,347 +952,6 @@ static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) {
}
MachineBasicBlock *
-MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
- // $bb:
- // bposge32_pseudo $vr0
- // =>
- // $bb:
- // bposge32 $tbb
- // $fbb:
- // li $vr2, 0
- // b $sink
- // $tbb:
- // li $vr1, 1
- // $sink:
- // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
-
- MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
- DebugLoc DL = MI->getDebugLoc();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, FBB);
- F->insert(It, TBB);
- F->insert(It, Sink);
-
- // Transfer the remainder of BB and its successor edges to Sink.
- Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- Sink->transferSuccessorsAndUpdatePHIs(BB);
-
- // Add successors.
- BB->addSuccessor(FBB);
- BB->addSuccessor(TBB);
- FBB->addSuccessor(Sink);
- TBB->addSuccessor(Sink);
-
- // Insert the real bposge32 instruction to $BB.
- BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
-
- // Fill $FBB.
- unsigned VR2 = RegInfo.createVirtualRegister(RC);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
- .addReg(Mips::ZERO).addImm(0);
- BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
-
- // Fill $TBB.
- unsigned VR1 = RegInfo.createVirtualRegister(RC);
- BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
- .addReg(Mips::ZERO).addImm(1);
-
- // Insert phi function to $Sink.
- BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
- MI->getOperand(0).getReg())
- .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return Sink;
-}
-
-MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg())
- .addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-MachineBasicBlock *MipsTargetLowering::emitSelT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addReg(MI->getOperand(4).getReg());
- BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock *MipsTargetLowering::emitSeliT16
- (unsigned Opc1, unsigned Opc2,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg())
- .addImm(MI->getOperand(4).getImm());
- BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
-
- BuildMI(*BB, BB->begin(), DL,
- TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB);
-
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-
-
-MachineBasicBlock
- *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- unsigned regY = MI->getOperand(1).getReg();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned regX = MI->getOperand(0).getReg();
- int64_t imm = MI->getOperand(1).getImm();
- MachineBasicBlock *target = MI->getOperand(2).getMBB();
- unsigned CmpOpc;
- if (isUInt<8>(imm))
- CmpOpc = CmpiOpc;
- else if (isUInt<16>(imm))
- CmpOpc = CmpiXOpc;
- else
- llvm_unreachable("immediate field not usable");
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-
-
-static unsigned Mips16WhichOp8uOr16simm
- (unsigned shortOp, unsigned longOp, int64_t Imm) {
- if (isUInt<8>(Imm))
- return shortOp;
- else if (isInt<16>(Imm))
- return longOp;
- else
- llvm_unreachable("immediate field not usable");
-}
-
-MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- unsigned regY = MI->getOperand(2).getReg();
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addReg(regY);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-}
-MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const {
- if (DontExpandCondPseudos16)
- return BB;
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- unsigned CC = MI->getOperand(0).getReg();
- unsigned regX = MI->getOperand(1).getReg();
- int64_t Imm = MI->getOperand(2).getImm();
- unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(SltOpc)).addReg(regX).addImm(Imm);
- BuildMI(*BB, MI, MI->getDebugLoc(),
- TII->get(Mips::MoveR3216), CC).addReg(Mips::T8);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
-
-}
-MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
@@ -1548,77 +1060,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::ATOMIC_CMP_SWAP_I64:
case Mips::ATOMIC_CMP_SWAP_I64_P8:
return emitAtomicCmpSwap(MI, BB, 8);
- case Mips::BPOSGE32_PSEUDO:
- return emitBPOSGE32(MI, BB);
- case Mips::SelBeqZ:
- return emitSel16(Mips::BeqzRxImm16, MI, BB);
- case Mips::SelBneZ:
- return emitSel16(Mips::BnezRxImm16, MI, BB);
- case Mips::SelTBteqZCmpi:
- return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBteqZSlti:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBteqZSltiu:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBtneZCmpi:
- return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::SelTBtneZSlti:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SelTBtneZSltiu:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SelTBteqZCmp:
- return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBteqZSlt:
- return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBteqZSltu:
- return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::SelTBtneZCmp:
- return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::SelTBtneZSlt:
- return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::SelTBtneZSltu:
- return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BteqzT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
- case Mips::BteqzT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BtnezT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
- case Mips::BtnezT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
- case Mips::BtnezT8SltuX16:
- // TBD: figure out a way to get this or remove the instruction
- // altogether.
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
- case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- break;
- case Mips::SltCCRxRy16:
- return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
- break;
- case Mips::SltiCCRxImmX16:
- return emitFEXT_CCRXI16_ins
- (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB);
- case Mips::SltiuCCRxImmX16:
- return emitFEXT_CCRXI16_ins
- (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB);
- case Mips::SltuCCRxRy16:
- return emitFEXT_CCRX16_ins
- (Mips::SltuRxRy16, MI, BB);
}
}
@@ -3074,28 +2515,6 @@ static unsigned getNextIntArgReg(unsigned Reg) {
return (Reg == Mips::A0) ? Mips::A1 : Mips::A3;
}
-/// isEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization.
-bool MipsTargetLowering::
-isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const {
- if (!EnableMipsTailCalls)
- return false;
-
- // No tail call optimization for mips16.
- if (Subtarget->inMips16Mode())
- return false;
-
- // Return false if either the callee or caller has a byval argument.
- if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
- return false;
-
- // Return true if the callee's argument area is no larger than the
- // caller's.
- return NextStackOffset <= FI.getIncomingArgSize();
-}
-
SDValue
MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
SDValue Chain, SDValue Arg, DebugLoc DL,
@@ -3114,161 +2533,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
/*isVolatile=*/ true, false, 0);
}
-//
-// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much
-// cleaner way to do all of this but it will have to wait until the traditional
-// gcc mechanism is completed.
-//
-// For Pic, in order for Mips16 code to call Mips32 code which according the abi
-// have either arguments or returned values placed in floating point registers,
-// we use a set of helper functions. (This includes functions which return type
-// complex which on Mips are returned in a pair of floating point registers).
-//
-// This is an encoding that we inherited from gcc.
-// In Mips traditional O32, N32 ABI, floating point numbers are passed in
-// floating point argument registers 1,2 only when the first and optionally
-// the second arguments are float (sf) or double (df).
-// For Mips16 we are only concerned with the situations where floating point
-// arguments are being passed in floating point registers by the ABI, because
-// Mips16 mode code cannot execute floating point instructions to load those
-// values and hence helper functions are needed.
-// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df)
-// the helper function suffixs for these are:
-// 0, 1, 5, 9, 2, 6, 10
-// this suffix can then be calculated as follows:
-// for a given argument Arg:
-// Arg1x, Arg2x = 1 : Arg is sf
-// 2 : Arg is df
-// 0: Arg is neither sf or df
-// So this stub is the string for number Arg1x + Arg2x*4.
-// However not all numbers between 0 and 10 are possible, we check anyway and
-// assert if the impossible exists.
-//
-
-unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber
- (ArgListTy &Args) const {
- unsigned int resultNum = 0;
- if (Args.size() >= 1) {
- Type *t = Args[0].Ty;
- if (t->isFloatTy()) {
- resultNum = 1;
- }
- else if (t->isDoubleTy()) {
- resultNum = 2;
- }
- }
- if (resultNum) {
- if (Args.size() >=2) {
- Type *t = Args[1].Ty;
- if (t->isFloatTy()) {
- resultNum += 4;
- }
- else if (t->isDoubleTy()) {
- resultNum += 8;
- }
- }
+void MipsTargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // Insert node "GP copy globalreg" before call to function.
+ //
+ // R_MIPS_CALL* operators (emitted when non-internal functions are called
+ // in PIC mode) allow symbols to be resolved via lazy binding.
+ // The lazy binding stub requires GP to point to the GOT.
+ if (IsPICCall && !InternalLinkage) {
+ unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
+ EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
+ RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
- return resultNum;
-}
-//
-// prefixs are attached to stub numbers depending on the return type .
-// return type: float sf_
-// double df_
-// single complex sc_
-// double complext dc_
-// others NO PREFIX
-//
-//
-// The full name of a helper function is__mips16_call_stub +
-// return type dependent prefix + stub number
-//
-//
-// This is something that probably should be in a different source file and
-// perhaps done differently but my main purpose is to not waste runtime
-// on something that we can enumerate in the source. Another possibility is
-// to have a python script to generate these mapping tables. This will do
-// for now. There are a whole series of helper function mapping arrays, one
-// for each return type class as outlined above. There there are 11 possible
-// entries. Ones with 0 are ones which should never be selected
-//
-// All the arrays are similar except for ones which return neither
-// sf, df, sc, dc, in which only care about ones which have sf or df as a
-// first parameter.
-//
-#define P_ "__mips16_call_stub_"
-#define MAX_STUB_NUMBER 10
-#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10"
-#define T P "0" , T1
-#define P P_
-static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
- {0, T1 };
-#undef P
-#define P P_ "sf_"
-static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "df_"
-static char const * dfMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "sc_"
-static char const * scMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#define P P_ "dc_"
-static char const * dcMips16Helper[MAX_STUB_NUMBER+1] =
- { T };
-#undef P
-#undef P_
-
-
-const char* MipsTargetLowering::
- getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const {
- const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args);
-#ifndef NDEBUG
- const unsigned int maxStubNum = 10;
- assert(stubNum <= maxStubNum);
- const bool validStubNum[maxStubNum+1] =
- {true, true, true, false, false, true, true, false, false, true, true};
- assert(validStubNum[stubNum]);
-#endif
- const char *result;
- if (RetTy->isFloatTy()) {
- result = sfMips16Helper[stubNum];
- }
- else if (RetTy ->isDoubleTy()) {
- result = dfMips16Helper[stubNum];
- }
- else if (RetTy->isStructTy()) {
- // check if it's complex
- if (RetTy->getNumContainedTypes() == 2) {
- if ((RetTy->getContainedType(0)->isFloatTy()) &&
- (RetTy->getContainedType(1)->isFloatTy())) {
- result = scMips16Helper[stubNum];
- }
- else if ((RetTy->getContainedType(0)->isDoubleTy()) &&
- (RetTy->getContainedType(1)->isDoubleTy())) {
- result = dcMips16Helper[stubNum];
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- llvm_unreachable("Uncovered condition");
- }
- }
- else {
- if (stubNum == 0) {
- needHelper = false;
- return "";
- }
- result = vMips16Helper[stubNum];
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emitted instructions must be
+ // stuck together.
+ SDValue InFlag;
+
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
- needHelper = true;
- return result;
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(CLI.DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
}
/// LowerCall - functions arguments are copied from virtual regs to
@@ -3287,26 +2593,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
- const char* Mips16HelperFunction = 0;
- bool NeedMips16Helper = false;
-
- if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat &&
- Mips16HardFloat) {
- //
- // currently we don't have symbols tagged with the mips16 or mips32
- // qualifier so we will assume that we don't know what kind it is.
- // and generate the helper
- //
- bool LookupHelper = true;
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) {
- LookupHelper = false;
- }
- }
- if (LookupHelper) Mips16HelperFunction =
- getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
-
- }
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
@@ -3466,80 +2752,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
GlobalOrExternal = true;
}
- SDValue JumpTarget = Callee;
-
- // T9 should contain the address of the callee function if
- // -reloction-model=pic or it is an indirect call.
- if (IsPICCall || !GlobalOrExternal) {
- unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
- unsigned V0Reg = Mips::V0;
- if (NeedMips16Helper) {
- RegsToPass.push_front(std::make_pair(V0Reg, Callee));
- JumpTarget = DAG.getExternalSymbol(
- Mips16HelperFunction, getPointerTy());
- JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
- }
- else {
- RegsToPass.push_front(std::make_pair(T9Reg, Callee));
-
- if (!Subtarget->inMips16Mode())
- JumpTarget = SDValue();
- }
- }
-
- // Insert node "GP copy globalreg" before call to function.
- //
- // R_MIPS_CALL* operators (emitted when non-internal functions are called
- // in PIC mode) allow symbols to be resolved via lazy binding.
- // The lazy binding stub requires GP to point to the GOT.
- if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP;
- EVT Ty = IsN64 ? MVT::i64 : MVT::i32;
- RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty)));
- }
-
- // Build a sequence of copy-to-reg nodes chained together with token
- // chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
- // stuck together.
- SDValue InFlag;
-
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // MipsJmpLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
- //
- // Returns a chain & a flag for retval copy to use.
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops(1, Chain);
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- if (JumpTarget.getNode())
- Ops.push_back(JumpTarget);
-
- // Add argument registers to the end of the list so that they are
- // known live into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ CLI, Callee, Chain);
if (IsTailCall)
return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+ SDValue InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
@@ -4124,14 +3347,14 @@ static bool isF128SoftLibCall(const char *CallSym) {
const char * const *End = LibCalls + array_lengthof(LibCalls);
// Check that LibCalls is sorted alphabetically.
-#ifndef NDEBUG
- LTStr Comp;
+ MipsTargetLowering::LTStr Comp;
+#ifndef NDEBUG
for (const char * const *I = LibCalls; I < End - 1; ++I)
assert(Comp(*I, *(I + 1)));
#endif
- return std::binary_search(LibCalls, End, CallSym, LTStr());
+ return std::binary_search(LibCalls, End, CallSym, Comp);
}
/// This function returns true if Ty is fp128 or i128 which was originally a
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index de925e16ab..71977d7f1c 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -152,9 +152,9 @@ namespace llvm {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ static const MipsTargetLowering *create(MipsTargetMachine &TM);
- virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -177,17 +177,34 @@ namespace llvm {
EVT getSetCCResultType(EVT VT) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- private:
- void setMips16LibcallName(RTLIB::Libcall, const char *Name);
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ struct LTStr {
+ bool operator()(const char *S1, const char *S2) const {
+ return strcmp(S1, S2) < 0;
+ }
+ };
+
+ protected:
+ SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
- void setMips16HardFloatLibCalls();
+ SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
- unsigned int
- getMips16HelperFunctionStubNumber(ArgListTy &Args) const;
+ SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
- const char *getMips16HelperFunction
- (Type* RetTy, ArgListTy &Args, bool &needHelper) const;
+ SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag) const;
+
+ /// This function fills Ops, which is the list of operands that will later
+ /// be used when a function call node is created. It also generates
+ /// copyToReg nodes to set up argument registers.
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
/// ByValArgInfo - Byval argument information.
struct ByValArgInfo {
@@ -283,6 +300,7 @@ namespace llvm {
bool HasMips64, IsN64, IsO32;
+ private:
// Lower Operand helpers
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -321,9 +339,10 @@ namespace llvm {
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
- bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const = 0;
/// copyByValArg - Copy argument registers which were used to pass a byval
/// argument to the stack. Create a stack frame object for the byval
@@ -377,10 +396,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
// Inline asm support
ConstraintType getConstraintType(const std::string &Constraint) const;
@@ -419,8 +434,6 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
- MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
- MachineBasicBlock *BB) const;
MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
@@ -430,29 +443,11 @@ namespace llvm {
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
- MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc,
- MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_T8I8I16_ins(
- unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_CCRX16_ins(
- unsigned SltOpc,
- MachineInstr *MI, MachineBasicBlock *BB) const;
- MachineBasicBlock *emitFEXT_CCRXI16_ins(
- unsigned SltiOpc, unsigned SltiXOpc,
- MachineInstr *MI, MachineBasicBlock *BB )const;
-
};
+
+ /// Create MipsTargetLowering objects.
+ const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
+ const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
}
#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 25b5d240be..022987f12e 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -256,6 +256,7 @@ def mem : Operand<i32> {
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem64 : Operand<i64> {
@@ -263,18 +264,21 @@ def mem64 : Operand<i64> {
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea : Operand<i32> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPURegs, simm16);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
def mem_ea_64 : Operand<i64> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops CPU64Regs, simm16_64);
let EncoderMethod = "getMemEncoding";
+ let OperandType = "OPERAND_MEMORY";
}
// size operand of ext instruction
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 13b2a6ac17..3c210e71bb 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -58,7 +58,8 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index f93dd86c17..6d76e8a54a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -18,6 +18,10 @@ def sub_lo : SubRegIndex;
def sub_hi : SubRegIndex;
}
+class Unallocatable {
+ bit isAllocatable = 0;
+}
+
// We have banks of 32 registers each.
class MipsReg<bits<16> Enc, string n> : Register<n> {
let HWEncoding = Enc;
@@ -291,9 +295,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add
// Callee save
S0, S1)>;
-def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>;
+def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable;
-def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>;
+def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// 64bit fp:
// * FGR64 - 32 64-bit registers
@@ -319,18 +323,19 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
// Condition Register for floating point operations
-def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>;
+def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
// Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
+def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
// Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
-def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
+def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>;
+def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>,
+ Unallocatable;
def CPURegsAsmOperand : AsmOperandClass {
let Name = "CPURegsAsm";
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
new file mode 100644
index 0000000000..e22c3c8b3f
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -0,0 +1,460 @@
+//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+
+bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+ const MachineInstr& MI) {
+ unsigned DstReg = 0, ZeroReg = 0;
+
+ // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+ if ((MI.getOpcode() == Mips::ADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO;
+ } else if ((MI.getOpcode() == Mips::DADDiu) &&
+ (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).getImm() == 0)) {
+ DstReg = MI.getOperand(0).getReg();
+ ZeroReg = Mips::ZERO_64;
+ }
+
+ if (!DstReg)
+ return false;
+
+ // Replace uses with ZeroReg.
+ for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+ E = MRI->use_end(); U != E;) {
+ MachineOperand &MO = U.getOperand();
+ unsigned OpNo = U.getOperandNo();
+ MachineInstr *MI = MO.getParent();
+ ++U;
+
+ // Do not replace if it is a phi's operand or is tied to def operand.
+ if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
+ continue;
+
+ MO.setReg(ZeroReg);
+ }
+
+ return true;
+}
+
+void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ if (!MipsFI->globalBaseRegSet())
+ return;
+
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator I = MBB.begin();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+ const TargetRegisterClass *RC;
+
+ if (Subtarget.isABI_N64())
+ RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+ else
+ RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
+
+ V0 = RegInfo.createVirtualRegister(RC);
+ V1 = RegInfo.createVirtualRegister(RC);
+
+ if (Subtarget.isABI_N64()) {
+ MF.getRegInfo().addLiveIn(Mips::T9_64);
+ MBB.addLiveIn(Mips::T9_64);
+
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // daddu $v1, $v0, $t9
+ // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0)
+ .addReg(Mips::T9_64);
+ BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+ // Set global register to __gnu_local_gp.
+ //
+ // lui $v0, %hi(__gnu_local_gp)
+ // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+ .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+ return;
+ }
+
+ MF.getRegInfo().addLiveIn(Mips::T9);
+ MBB.addLiveIn(Mips::T9);
+
+ if (Subtarget.isABI_N32()) {
+ // lui $v0, %hi(%neg(%gp_rel(fname)))
+ // addu $v1, $v0, $t9
+ // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+ const GlobalValue *FName = MF.getFunction();
+ BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+ .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+ return;
+ }
+
+ assert(Subtarget.isABI_O32());
+
+ // For O32 ABI, the following instruction sequence is emitted to initialize
+ // the global base register:
+ //
+ // 0. lui $2, %hi(_gp_disp)
+ // 1. addiu $2, $2, %lo(_gp_disp)
+ // 2. addu $globalbasereg, $2, $t9
+ //
+ // We emit only the last instruction here.
+ //
+ // GNU linker requires that the first two instructions appear at the beginning
+ // of a function and no instructions be inserted before or between them.
+ // The two instructions are emitted during lowering to MC layer in order to
+ // avoid any reordering.
+ //
+ // Register $2 (Mips::V0) is added to the list of live-in registers to ensure
+ // the value instruction 1 (addiu) defines is valid when instruction 2 (addu)
+ // reads it.
+ MF.getRegInfo().addLiveIn(Mips::V0);
+ MBB.addLiveIn(Mips::V0);
+ BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
+ .addReg(Mips::V0).addReg(Mips::T9);
+}
+
+void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
+ initGlobalBaseReg(MF);
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+ for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+ ++MFI)
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ replaceUsesWithZeroReg(MRI, *I);
+}
+
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
+ bool HasLo, bool HasHi) {
+ SDNode *Lo = 0, *Hi = 0;
+ SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
+ N->getOperand(1));
+ SDValue InFlag = SDValue(Mul, 0);
+
+ if (HasLo) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
+ InFlag = SDValue(Lo, 1);
+ }
+ if (HasHi) {
+ unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ }
+ return std::make_pair(Lo, Hi);
+}
+
+SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
+ SDValue CmpLHS, DebugLoc DL,
+ SDNode *Node) const {
+ unsigned Opc = InFlag.getOpcode(); (void)Opc;
+
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+ SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+ SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
+ EVT VT = LHS.getValueType();
+
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
+ SDValue(Carry, 0), RHS);
+ return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
+ SDValue(AddCarry, 0));
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ Offset = CurDAG->getTargetConstant(0, ValTy);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (Addr.getOpcode() == MipsISD::Wrapper) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (TM.getRelocationModel() != Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<16>(CN->getSExtValue())) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Example, instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(1).getOpcode() == MipsISD::Lo ||
+ Addr.getOperand(1).getOpcode() == MipsISD::GPRel) {
+ SDValue Opnd0 = Addr.getOperand(1).getOperand(0);
+ if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) ||
+ isa<JumpTableSDNode>(Opnd0)) {
+ Base = Addr.getOperand(0);
+ Offset = Opnd0;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
+ return true;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc DL = Node->getDebugLoc();
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ EVT NodeTy = Node->getValueType(0);
+ SDNode *Result;
+ unsigned MultOpc;
+
+ switch(Opcode) {
+ default: break;
+
+ case ISD::SUBE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ADDE: {
+ SDValue InFlag = Node->getOperand(2);
+ Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
+ return std::make_pair(true, Result);
+ }
+
+ /// Mul with two results
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
+
+ std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
+ true, true);
+
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
+
+ if (!SDValue(Node, 1).use_empty())
+ ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
+
+ return std::make_pair(true, (SDNode*)NULL);
+ }
+
+ /// Special Muls
+ case ISD::MUL: {
+ // Mips32 has a 32-bit three operand mul instruction.
+ if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+ break;
+ MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
+ return std::make_pair(true, Result);
+ }
+ case ISD::MULHS:
+ case ISD::MULHU: {
+ if (NodeTy == MVT::i32)
+ MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+ else
+ MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+ Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
+ return std::make_pair(true, Result);
+ }
+
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ if (Subtarget.hasMips64()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO_64, MVT::i64);
+ Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero,
+ Zero);
+ }
+
+ return std::make_pair(true, Result);
+ }
+ break;
+ }
+
+ case ISD::Constant: {
+ const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+ unsigned Size = CN->getValueSizeInBits(0);
+
+ if (Size == 32)
+ break;
+
+ MipsAnalyzeImmediate AnalyzeImm;
+ int64_t Imm = CN->getSExtValue();
+
+ const MipsAnalyzeImmediate::InstSeq &Seq =
+ AnalyzeImm.Analyze(Imm, Size, false);
+
+ MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+ DebugLoc DL = CN->getDebugLoc();
+ SDNode *RegOpnd;
+ SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+
+ // The first instruction can be a LUi which is different from other
+ // instructions (ADDiu, ORI and SLL) in that it does not have a register
+ // operand.
+ if (Inst->Opc == Mips::LUi64)
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+ else
+ RegOpnd =
+ CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+ ImmOpnd);
+
+ // The remaining instructions in the sequence are handled here.
+ for (++Inst; Inst != Seq.end(); ++Inst) {
+ ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+ MVT::i64);
+ RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+ SDValue(RegOpnd, 0), ImmOpnd);
+ }
+
+ return std::make_pair(true, RegOpnd);
+ }
+
+ case MipsISD::ThreadPointer: {
+ EVT PtrVT = TLI.getPointerTy();
+ unsigned RdhwrOpc, SrcReg, DestReg;
+
+ if (PtrVT == MVT::i32) {
+ RdhwrOpc = Mips::RDHWR;
+ SrcReg = Mips::HWR29;
+ DestReg = Mips::V1;
+ } else {
+ RdhwrOpc = Mips::RDHWR64;
+ SrcReg = Mips::HWR29_64;
+ DestReg = Mips::V1_64;
+ }
+
+ SDNode *Rdhwr =
+ CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+ Node->getValueType(0),
+ CurDAG->getRegister(SrcReg, PtrVT));
+ SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
+ SDValue(Rdhwr, 0));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ ReplaceUses(SDValue(Node, 0), ResNode);
+ return std::make_pair(true, ResNode.getNode());
+ }
+ }
+
+ return std::make_pair(false, (SDNode*)NULL);
+}
+
+FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
+ return new MipsSEDAGToDAGISel(TM);
+}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
new file mode 100644
index 0000000000..6137ab040b
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -0,0 +1,57 @@
+//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsDAGToDAGISel specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEISELDAGTODAG_H
+#define MIPSSEISELDAGTODAG_H
+
+#include "MipsISelDAGToDAG.h"
+
+namespace llvm {
+
+class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
+
+public:
+ explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
+
+private:
+ bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+
+ std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+ EVT Ty, bool HasLo, bool HasHi);
+
+ SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS,
+ DebugLoc DL, SDNode *Node) const;
+
+ virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+
+ virtual void processFunctionAfterISel(MachineFunction &MF);
+
+ // Insert instructions to initialize the global base register in the
+ // first MBB of the function.
+ void initGlobalBaseReg(MachineFunction &MF);
+};
+
+FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
new file mode 100644
index 0000000000..287e2ede0e
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -0,0 +1,197 @@
+//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsSEISelLowering.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
+MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
+ : MipsTargetLowering(TM) {
+ // Set up the register classes
+ addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
+
+ if (HasMips64)
+ addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass);
+
+ if (Subtarget->hasDSP()) {
+ MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
+
+ for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
+ addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, VecTys[i], Expand);
+
+ setOperationAction(ISD::LOAD, VecTys[i], Legal);
+ setOperationAction(ISD::STORE, VecTys[i], Legal);
+ setOperationAction(ISD::BITCAST, VecTys[i], Legal);
+ }
+ }
+
+ if (!TM.Options.UseSoftFloat) {
+ addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
+
+ // When dealing with single precision only, use libcalls
+ if (!Subtarget->isSingleFloat()) {
+ if (HasMips64)
+ addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
+ else
+ addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
+ }
+ }
+
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+ computeRegisterProperties();
+}
+
+const MipsTargetLowering *
+llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
+ return new MipsSETargetLowering(TM);
+}
+
+
+bool
+MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+ MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+
+ switch (SVT) {
+ case MVT::i64:
+ case MVT::i32:
+ if (Fast)
+ *Fast = true;
+ return true;
+ default:
+ return false;
+ }
+}
+
+MachineBasicBlock *
+MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ switch (MI->getOpcode()) {
+ default:
+ return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ case Mips::BPOSGE32_PSEUDO:
+ return emitBPOSGE32(MI, BB);
+ }
+}
+
+bool MipsSETargetLowering::
+isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Return false if either the callee or caller has a byval argument.
+ if (MipsCCInfo.hasByValArg() || FI.hasByvalArg())
+ return false;
+
+ // Return true if the callee's argument area is no larger than the
+ // caller's.
+ return NextStackOffset <= FI.getIncomingArgSize();
+}
+
+void MipsSETargetLowering::
+getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
+ // T9 should contain the address of the callee function if
+ // -reloction-model=pic or it is an indirect call.
+ if (IsPICCall || !GlobalOrExternal) {
+ unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+ RegsToPass.push_front(std::make_pair(T9Reg, Callee));
+ } else
+ Ops.push_back(Callee);
+
+ MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
+ InternalLinkage, CLI, Callee, Chain);
+}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
+ // $bb:
+ // bposge32_pseudo $vr0
+ // =>
+ // $bb:
+ // bposge32 $tbb
+ // $fbb:
+ // li $vr2, 0
+ // b $sink
+ // $tbb:
+ // li $vr1, 1
+ // $sink:
+ // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::CPURegsRegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bposge32 instruction to $BB.
+ BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned VR2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned VR1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
new file mode 100644
index 0000000000..04a28ce54c
--- /dev/null
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -0,0 +1,46 @@
+//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Subclass of MipsTargetLowering specialized for mips32/64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsSEISELLOWERING_H
+#define MipsSEISELLOWERING_H
+
+#include "MipsISelLowering.h"
+
+namespace llvm {
+ class MipsSETargetLowering : public MipsTargetLowering {
+ public:
+ explicit MipsSETargetLowering(MipsTargetMachine &TM);
+
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
+
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+ private:
+ virtual bool
+ isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const;
+
+ virtual void
+ getOpndList(SmallVectorImpl<SDValue> &Ops,
+ std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
+ bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
+ CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+
+ MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ };
+}
+
+#endif // MipsSEISELLOWERING_H
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index fd930f0335..33363580ab 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(*this), TSInfo(*this), JITInfo() {
+ TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
}
void MipsebTargetMachine::anchor() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index c4928c21eb..7e5f192264 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine {
const DataLayout DL; // Calculates type size & alignment
OwningPtr<const MipsInstrInfo> InstrInfo;
OwningPtr<const MipsFrameLowering> FrameLowering;
- MipsTargetLowering TLInfo;
+ OwningPtr<const MipsTargetLowering> TLInfo;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
@@ -63,7 +63,7 @@ public:
}
virtual const MipsTargetLowering *getTargetLowering() const {
- return &TLInfo;
+ return TLInfo.get();
}
virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 7917f7736e..709daa4e4b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -12,6 +12,8 @@
#include "llvm/MC/MCFixup.h"
+#undef PPC
+
namespace llvm {
namespace PPC {
enum Fixups {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 4a420929d0..38a7420d97 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
} // End llvm namespace
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
// Defines symbolic names for PowerPC registers. This defines a mapping from
// register name to register number.
//
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 972e13852e..b0680fbb8c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -17,6 +17,10 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index b98cc489f6..81a54d7015 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
/// isCompareEquals - Returns true if the instruction is a compare equals
/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) {
- if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) {
+static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
+ bool &Int64Cmp) {
+ if (MI->getOpcode() == PPC::CMPWI) {
SignedCmp = true;
+ Int64Cmp = false;
return true;
- } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) {
+ } else if (MI->getOpcode() == PPC::CMPDI) {
+ SignedCmp = true;
+ Int64Cmp = true;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLWI) {
+ SignedCmp = false;
+ Int64Cmp = false;
+ return true;
+ } else if (MI->getOpcode() == PPC::CMPLDI) {
SignedCmp = false;
+ Int64Cmp = true;
return true;
}
@@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
RI != RE; ++RI) {
IV_Opnd = &RI.getOperand();
- bool SignedCmp;
+ bool SignedCmp, Int64Cmp;
MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
+ if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
MI->getOperand(0).getReg() == PredReg) {
OldInsts.push_back(MI);
@@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
assert(InitialValue->isReg() && "Expecting register for init value");
unsigned InitialValueReg = InitialValue->getReg();
- const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
+ MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
// Here we need to look for an immediate load (an li or lis/ori pair).
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = (short) DefInstr->getOperand(2).getImm();
- const MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(0).getReg());
+ int64_t start = DefInstr->getOperand(2).getImm();
+ MachineInstr *DefInstr2 =
+ MRI->getVRegDef(DefInstr->getOperand(1).getReg());
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
DefInstr2->getOpcode() == PPC::LIS)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
@@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+ OldInsts.push_back(DefInstr2);
+
+ // count/iv_value, the trip count, should be positive here. If it
+ // is negative, that indicates that the counter will wrap.
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
}
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
DefInstr->getOpcode() == PPC::LI)) {
DEBUG(dbgs() << " initial constant: " << *DefInstr);
- int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm()));
+ int64_t count = ImmVal -
+ int64_t(short(DefInstr->getOperand(1).getImm()));
if ((count % iv_value) != 0) {
return 0;
}
- return new CountValue(count/iv_value);
+
+ OldInsts.push_back(DefInstr);
+
+ if (Int64Cmp)
+ return new CountValue(count/iv_value);
+ else
+ return new CountValue(uint32_t(count/iv_value));
} else if (iv_value == 1 || iv_value == -1) {
// We can't determine a constant starting value.
if (ImmVal == 0) {
@@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
}
// FIXME: handle non-zero end value.
}
- // FIXME: handle non-unit increments (we might not want to introduce division
- // but we can handle some 2^n cases with shifts).
+ // FIXME: handle non-unit increments (we might not want to introduce
+ // division but we can handle some 2^n cases with shifts).
}
}
@@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI,
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for the
- // parent block, and the only use of that phi node is this instruction, then
- // this instruction is dead: both it (and the phi node) can be removed.
+ // This instruction has users, but if the only user is the phi node for
+ // the parent block, and the only use of that phi node is this
+ // instruction, then this instruction is dead: both it (and the phi
+ // node) can be removed.
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
if (llvm::next(I) == MRI->use_end() &&
I.getOperand().getParent()->isPHI()) {
@@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
DEBUG(dbgs() << "failed to get trip count!\n");
return false;
}
+
+ if (TripCount->isImm()) {
+ DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+
+ // FIXME: We currently can't form 64-bit constants
+ // (including 32-bit unsigned constants)
+ if (!isInt<32>(TripCount->getImm()))
+ return false;
+ }
+
// Does the loop contain any invalid instructions?
if (containsInvalidInstruction(L)) {
return false;
@@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
const TargetRegisterClass *SrcRC =
MF->getRegInfo().getRegClass(TripCount->getReg());
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ?
+ unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
(unsigned) PPC::EXTSW_32_64 :
(unsigned) TargetOpcode::COPY;
BuildMI(*Preheader, InsertPos, dl,
@@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
// Put the trip count in a register for transfer into the count register.
int64_t CountImm = TripCount->getImm();
- assert(!TripCount->isNeg() && "Constant trip count must be positive");
+ if (TripCount->isNeg())
+ CountImm = -CountImm;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (CountImm > 0xFFFF) {
+ if (abs64(CountImm) > 0x7FFF) {
BuildMI(*Preheader, InsertPos, dl,
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm(CountImm >> 16);
+ CountReg).addImm((CountImm >> 16) & 0xFFFF);
unsigned CountReg1 = CountReg;
CountReg = MF->getRegInfo().createVirtualRegister(RC);
BuildMI(*Preheader, InsertPos, dl,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index caeb1796f7..2a680661d3 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -136,3 +136,8 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV
F27, F28, F29, F30, F31, CR2, CR3, CR4,
V20, V21, V22, V23, V24, V25, V26, V27,
V28, V29, V30, V31)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0a396e6693..0108f7256e 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) {
return FuncInfo->isCRSpilled();
}
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize = MFI->getStackSize();
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
// Get the alignments provided by the target, and the maximum alignment
// (if any) of the fixed frame objects.
@@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
&& spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
- MFI->setStackSize(0);
- return;
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
}
// Get the maximum call frame size of all the calls.
@@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
// Update maximum call frame size.
- MFI->setMaxCallFrameSize(maxCallFrameSize);
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
// Include call frame size in total.
FrameSize += maxCallFrameSize;
@@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
FrameSize = (FrameSize + AlignMask) & ~AlignMask;
// Update frame info.
- MFI->setStackSize(FrameSize);
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
}
// hasFP - Return true if the specified function actually has a dedicated frame
@@ -281,6 +299,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
MF.getInfo<PPCFunctionInfo>()->hasFastCall());
}
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ }
+ }
+ }
+}
void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
@@ -311,13 +354,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MBBI = MBB.begin();
// Work out frame sizes.
- // FIXME: determineFrameLayout() may change the frame size. This should be
- // moved upper, to some hook.
- determineFrameLayout(MF);
- unsigned FrameSize = MFI->getStackSize();
-
+ unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
// Get operating system
@@ -780,7 +822,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *) const {
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
// Save and clear the LR state.
@@ -822,30 +864,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
FI->setCRSpillFrameIndex(FrameIdx);
}
-
- // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
- // a large stack, which will require scavenging a register to materialize a
- // large offset.
- // FIXME: this doesn't actually check stack size, so is a bit pessimistic
- // FIXME: doesn't detect whether or not we need to spill vXX, which requires
- // r0 for now.
-
- if (RegInfo->requiresRegisterScavenging(MF))
- if (needsFP(MF) || spillsCR(MF)) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
}
-void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
// Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI())
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
return;
+ }
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -853,6 +880,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// Early exit if no callee saved registers are modified!
if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
return;
}
@@ -1031,6 +1059,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
}
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
+ (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+ }
}
bool
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d09e47fafd..6f5f9368c6 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -32,7 +32,9 @@ public:
Subtarget(sti) {
}
- void determineFrameLayout(MachineFunction &MF) const;
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -41,10 +43,13 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -144,6 +149,9 @@ public:
return 0;
}
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
static const SpillSlot Offsets[] = {
// Floating-point register save area offsets.
{PPC::F31, -8},
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 17bea8a6a6..90a3a5c6a4 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -164,6 +164,12 @@ namespace {
return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
}
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions. It is always correct to compute the value into
/// a register. The case of adding a (possibly relocatable) constant to a
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13cb358fc0..c7d454692e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ PPCRegInfo = TM.getRegisterInfo();
setPow2DivIsCheap();
@@ -208,6 +212,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
@@ -564,6 +576,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFCR: return "PPCISD::MFCR";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
@@ -1028,7 +1042,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1077,7 +1091,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1140,7 +1154,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1180,13 +1194,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue Ptr;
EVT VT;
+ unsigned Alignment;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
-
+ Alignment = LD->getAlignment();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
} else
return false;
@@ -1205,6 +1221,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
return false;
} else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
// reg + imm * 4.
if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
return false;
@@ -4555,6 +4575,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
}
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -5551,6 +5586,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
@@ -5744,7 +5782,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -5863,9 +5901,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
return BB;
}
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB);
+ MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (PPCSubTarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset / 4)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // FIXME: When we also support base pointers, that register must also be
+ // restored here.
+
+ // Reload TOC
+ if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset / 4)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// To "insert" these instructions we actually have to insert their
@@ -6133,7 +6400,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
- unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -6622,6 +6889,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
+ if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
if (VT == MVT::i64 && PPCSubTarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
@@ -6806,13 +7076,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
- bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects()) &&
- MFI->getStackSize() &&
- !MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked);
- unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
- (is31 ? PPC::R31 : PPC::R1);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
PtrVT);
while (Depth--)
@@ -6851,6 +7124,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector())
+ return false;
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3931384d89..1b0427e021 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#include "PPC.h"
+#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -119,6 +120,12 @@ namespace llvm {
/// are undefined.
MFCR,
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
/// instructions. For lack of better number, we use the opcode number
/// encoding for the OPC field to identify the compare. For example, 838
@@ -321,6 +328,7 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
+ const PPCRegisterInfo *PPCRegInfo;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -395,6 +403,12 @@ namespace llvm {
MachineBasicBlock *MBB,
bool is8bit, unsigned Opcode) const;
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
@@ -449,6 +463,10 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
+
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
@@ -604,6 +622,9 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 01201304f7..d2b1d6d392 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -30,12 +30,12 @@ def symbolLo64 : Operand<i64> {
let EncoderMethod = "getLO16Encoding";
}
def tocentry : Operand<iPTR> {
- let MIOperandInfo = (ops i32imm:$imm);
+ let MIOperandInfo = (ops i64imm:$imm);
}
def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64
let PrintMethod = "printMemRegImm";
let EncoderMethod = "getMemRIXEncoding";
- let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo64:$off, ptr_rc_nor0:$reg);
}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
@@ -273,6 +273,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -393,15 +406,16 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
[(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
}
-def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, s16imm64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm),
+ [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>;
+def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+ [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+ [(set G8RC:$rD, (add G8RC_NOX0:$rA,
+ imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
@@ -536,7 +550,7 @@ def RLWINM8 : MForm_2<21,
[]>;
def ISEL8 : AForm_4<31, 15,
- (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond),
+ (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
} // End FXU Operations.
@@ -555,7 +569,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
PPC970_DGroup_Cracked;
def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
- [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+ [(set G8RC:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
@@ -567,25 +582,26 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
PPC970_DGroup_Cracked;
// Update forms.
-let mayLoad = 1 in
-def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
- ptr_rc:$rA),
- "lhau $rD, $disp($rA)", LdStLHAU,
- []>, RegConstraint<"$rA = $ea_result">,
+let mayLoad = 1 in {
+def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
+}
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
@@ -612,30 +628,30 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
// Update forms.
let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -648,7 +664,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result),
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
- [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+ [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
"ld $rD, $src", LdStLD,
[]>, isPPC64;
@@ -684,12 +700,12 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
[(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
let mayLoad = 1 in
-def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result),
+def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"ldux $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -798,82 +814,11 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
// Normal 8-byte stores.
def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
- [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+ [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
[(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
-}
-
-let PPC970_Unit = 2 in {
-
-def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
-def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- s16immX4:$ptroff, ptr_rc:$ptrreg),
- "stdu $rS, $ptroff($ptrreg)", LdStSTDU,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
- isPPC64;
-
-
-def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti32 G8RC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res),
- (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stdux $rS, $ptroff, $ptrreg", LdStSTDU,
- [(set ptr_rc:$ea_res,
- (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked, isPPC64;
-
// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
"std $rT, $dst", LdStSTD,
@@ -884,6 +829,60 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
PPC970_DGroup_Cracked;
}
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+ "stdu $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+ "stdux $rS, $dst", LdStSTDU, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STBU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STHU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STWU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(aligned4pre_store G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STDU G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+
+def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STBUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STHUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STWUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STDUX G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
//===----------------------------------------------------------------------===//
@@ -979,3 +978,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
(ADDIS8 G8RC:$in, tjumptable:$g)>;
def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS8 G8RC:$in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst),
+ (STDX G8RC:$rS, xoaddr:$dst)>;
+
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 0cf28ae4b5..0ed7ff2cc4 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
//===----------------------------------------------------------------------===//
// Instruction Definitions.
+def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
"dss $STRM", LdStLoad /*FIXME*/, []>;
@@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))),
(VRFIZ VRRC:$vA)>;
def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))),
(VRFIN VRRC:$vA)>;
+
+} // end HasAltivec
+
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index c3c171cd21..16dbee5d29 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
let CR = 0;
}
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index a0517a80a9..cf39386c67 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,11 +33,6 @@
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
-namespace llvm {
-extern cl::opt<bool> DisablePPC32RS;
-extern cl::opt<bool> DisablePPC64RS;
-}
-
using namespace llvm;
static cl::
@@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const{
DebugLoc DL;
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (SrcReg != PPC::LR) {
@@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
getKillRegState(isKill)),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
- .addReg(SrcReg,
- getKillRegState(isKill)),
- FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
-
- bool is64Bit = TM.getSubtargetImpl()->isPPC64();
- // We need to store the CR in the low 4-bits of the saved value. First,
- // issue a MFCR to save all of the CRBits.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- (is64Bit ? PPC::X2 : PPC::R2) :
- (is64Bit ? PPC::X0 : PPC::R0);
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
- PPC::MFCRpseud), ScratchReg)
- .addReg(SrcReg, getKillRegState(isKill)));
-
- // If the saved register wasn't CR0, shift the bits left so that they are
- // in CR0's slot.
- if (SrcReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
- // rlwinm scratch, scratch, ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
- PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(ShiftBits)
- .addImm(0).addImm(31));
- }
-
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
- PPC::STW8 : PPC::STW))
- .addReg(ScratchReg,
- getKillRegState(isKill)),
- FrameIdx));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
// FIXME: We use CRi here because there is no mtcrf on a bit. Since the
// backend currently only uses CR1EQ as an individual bit, this should
@@ -562,23 +522,20 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
Reg = PPC::CR7;
return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // STVX VAL, 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
- .addReg(SrcReg, getKillRegState(isKill))
- .addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -595,10 +552,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
- if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI))
FuncInfo->setSpillsCR();
- }
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
@@ -616,7 +578,8 @@ bool
PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs)const{
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const{
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
if (DestReg != PPC::LR) {
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
@@ -642,37 +605,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
FrameIdx));
} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
- if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
- (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
- get(PPC::RESTORE_CR), DestReg)
- , FrameIdx));
- return true;
- } else {
- // FIXME: We need a scatch reg here. The trouble with using R0 is that
- // it's possible for the stack frame to be so big the save location is
- // out of range of immediate offsets, necessitating another register.
- // We hack this on Darwin by reserving R2. It's probably broken on Linux
- // at the moment.
- unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
- PPC::R2 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
- ScratchReg), FrameIdx));
-
- // If the reloaded register isn't CR0, shift the bits right so that they are
- // in the right CR's slot.
- if (DestReg != PPC::CR0) {
- unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
- // rlwinm r11, r11, 32-ShiftBits, 0, 31.
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
- .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
- .addImm(31));
- }
-
- NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
- PPC::MTCRF8 : PPC::MTCRF), DestReg)
- .addReg(ScratchReg));
- }
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
unsigned Reg = 0;
@@ -702,21 +638,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
Reg = PPC::CR7;
return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
- &PPC::CRRCRegClass, NewMIs);
+ &PPC::CRRCRegClass, NewMIs, NonRI);
} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
- // We don't have indexed addressing for vector loads. Emit:
- // R0 = ADDI FI#
- // Dest = LVX 0, R0
- //
- // FIXME: We use R0 here, because it isn't available for RA.
- bool Is64Bit = TM.getSubtargetImpl()->isPPC64();
- unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
- unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0;
- NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0),
- FrameIdx, 0, 0));
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0)
- .addReg(GPR0));
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
@@ -734,10 +667,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
- if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
- PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI))
FuncInfo->setSpillsCR();
- }
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 374213ea43..5d4ae915b4 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool StoreRegToStackSlot(MachineFunction &MF,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const;
bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI) const;
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 460e94342d..84cdb1f39d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -158,6 +158,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -278,6 +286,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
}], HI16>;
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (offset/4) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@@ -314,9 +354,6 @@ def s16imm : Operand<i32> {
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
}
-def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing.
- let PrintMethod = "printS16X4ImmOperand";
-}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
@@ -344,21 +381,30 @@ def crbitm: Operand<i8> {
let EncoderMethod = "get_crbitm_encoding";
}
// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def ptr_rc_nor0 : PointerLikeRegClass<1>;
+
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
- let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIEncoding";
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$offreg, ptr_rc:$ptrreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
- let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg);
+ let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+
// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
// that doesn't matter.
def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
@@ -372,6 +418,10 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;
@@ -473,6 +523,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
}
+// The direct BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
+ }
+}
+
// Darwin ABI Calls.
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
@@ -551,6 +609,23 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
"ba $dst", BrB,
[]>;
+let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in {
+ def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
@@ -708,69 +783,69 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.offreg = $ea_result">,
@@ -835,44 +910,39 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2 in {
-def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd,
- [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU,
- [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stbu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "sthu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+ "stwu $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+ "stfsu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+ "stfdu $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STBU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STHU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_store GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_store F4RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STFSU F4RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
+def : Pat<(pre_store F8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff),
+ (STFDU F8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>;
// Indexed (r+r) Stores.
-//
let PPC970_Unit = 2 in {
def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
@@ -887,48 +957,6 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
[(store GPRC:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti8 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_truncsti16 GPRC:$rS,
- ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res),
- (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd,
- [(set ptr_rc:$ea_res,
- (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res),
- (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
-def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res),
- (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg),
- "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU,
- [(set ptr_rc:$ea_res,
- (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>,
- RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">,
- PPC970_DGroup_Cracked;
-
def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>,
@@ -950,6 +978,44 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
[(store F8RC:$frS, xaddr:$dst)]>;
}
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stbux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "sthux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+ "stwux $rS, $dst", LdStStoreUpd, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+ "stfsux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+ "stfdux $rS, $dst", LdStSTFDU, []>,
+ RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STBUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STHUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STWUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STFSUX F4RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+def : Pat<(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff),
+ (STFDUX F8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>;
+
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -959,12 +1025,12 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, s16imm:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
-def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm),
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>;
+def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>;
let Defs = [CARRY] in {
def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
@@ -974,12 +1040,13 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>;
}
-def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
"addis $rD, $rA, $imm", IntSimple,
- [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
-def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA,
+ imm16ShiftedSExt:$imm))]>;
+def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
- [(set GPRC:$rD, (add GPRC:$rA,
+ [(set GPRC:$rD, (add GPRC_NOR0:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
@@ -1213,6 +1280,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
+let isCodeGenOnly = 1 in {
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1435,7 +1525,7 @@ let Uses = [RM] in {
let PPC970_Unit = 1 in { // FXU Operations.
def ISEL : AForm_4<31, 15,
- (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond),
+ (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, pred:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 045b375dd8..b1636a20b8 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// PEI.
bool MustSaveLR;
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
/// SpillsCR - Indicates whether CR is spilled in the current function.
bool SpillsCR;
@@ -78,6 +85,8 @@ public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
SpillsCR(false),
LRStoreRequired(false),
MinReservedArea(0),
@@ -109,6 +118,12 @@ public:
void setMustSaveLR(bool U) { MustSaveLR = U; }
bool mustSaveLR() const { return MustSaveLR; }
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
void setSpillsCR() { SpillsCR = true; }
bool isCRSpilled() const { return SpillsCR; }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index df245cc655..75e20e4a26 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,26 +46,8 @@
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-namespace llvm {
-cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC32 register scavenger"),
- cl::Hidden);
-cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
- cl::init(false),
- cl::desc("Disable PPC64 register scavenger"),
- cl::Hidden);
-}
-
using namespace llvm;
-// FIXME (64-bit): Should be inlined.
-bool
-PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
- return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
- (!DisablePPC64RS && Subtarget.isPPC64()));
-}
-
PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetInstrInfo &tii)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
@@ -89,17 +71,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
}
-bool
-PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- return requiresRegisterScavenging(MF);
-}
-
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
if (Subtarget.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
@@ -123,11 +105,33 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
}
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ // The naming here is inverted: The CSR_NoRegs_Altivec has the
+ // Altivec registers masked so that they're not saved and restored around
+ // instructions with this preserved mask.
+
+ if (!Subtarget.hasAltivec())
+ return CSR_NoRegs_Altivec_RegMask;
+
+ return CSR_NoRegs_RegMask;
+}
+
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
Reserved.set(PPC::R0);
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
@@ -139,35 +143,22 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // Reserve R2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::R2);
- }
// On PPC64, r13 is the thread pointer. Never allocate this register.
- // Note that this is over conservative, as it also prevents allocation of R31
- // when the FP is not needed.
if (Subtarget.isPPC64()) {
Reserved.set(PPC::R13);
- Reserved.set(PPC::R31);
Reserved.set(PPC::X0);
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
- Reserved.set(PPC::X31);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
}
- // Reserve X2 on Darwin to hack around the problem of save/restore of CR
- // when the stack frame is too big to address directly; we need two regs.
- // This is a hack.
- if (Subtarget.isDarwinABI()) {
- Reserved.set(PPC::X2);
- }
}
if (PPCFI->needsFP(MF))
@@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
switch (RC->getID()) {
default:
return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-bool
-PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- switch (RC->getID()) {
- case PPC::G8RCRegClassID:
- case PPC::GPRCRegClassID:
- case PPC::F8RCRegClassID:
- case PPC::F4RCRegClassID:
- case PPC::VRRCRegClassID:
- return true;
- default:
- return false;
- }
-}
-
//===----------------------------------------------------------------------===//
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static
-unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
- const TargetRegisterClass *RC, int SPAdj) {
- assert(RS && "Register scavenging must be on");
- unsigned Reg = RS->FindUnusedReg(RC);
- // FIXME: move ARM callee-saved reg scan to target independent code, then
- // search for already spilled CS register here.
- if (Reg == 0)
- Reg = RS->scavengeRegister(RC, II, SPAdj);
- return Reg;
-}
-
/// lowerDynamicAlloc - Generate the code for allocating an object in the
/// current frame. The sequence of code with be in the general form
///
@@ -271,28 +236,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
-
- // FIXME (64-bit): Use "findScratchRegister"
- unsigned Reg;
- if (requiresRegisterScavenging(MF))
- Reg = findScratchRegister(II, RS, RC, SPAdj);
- else
- Reg = PPC::R0;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
} else if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
- .addImm(0)
- .addReg(PPC::X1);
- else
- BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(0)
- .addReg(PPC::X1);
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
} else {
BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
.addImm(0)
@@ -302,17 +255,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
- if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(Reg, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- else
- BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
-
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(MI.getOperand(1).getReg());
if (!MI.getOperand(1).isKill())
BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
.addReg(PPC::X1)
@@ -369,8 +315,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
(void) RS;
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
@@ -412,8 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
(void) RS;
bool LP64 = Subtarget.isPPC64();
- unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) :
- (LP64 ? PPC::X0 : PPC::R0);
+ unsigned Reg = LP64 ? PPC::X0 : PPC::R0;
unsigned DestReg = MI.getOperand(0).getReg();
assert(MI.definesRegister(DestReg) &&
"RESTORE_CR does not define its destination");
@@ -438,6 +382,65 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ unsigned Reg = PPC::R0;
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex, int SPAdj,
+ RegScavenger *RS) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // FIXME: Once LLVM supports creating virtual registers here, or the register
+ // scavenger can return multiple registers, stop using reserved registers
+ // here.
+ (void) SPAdj;
+ (void) RS;
+
+ unsigned Reg = PPC::R0;
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
@@ -498,15 +501,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
return;
}
- // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
- if (requiresRegisterScavenging(MF)) {
- if (OpC == PPC::SPILL_CR) {
- lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return;
- } else if (OpC == PPC::RESTORE_CR) {
- lowerCRRestore(II, FrameIndex, SPAdj, RS);
- return;
- }
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex, SPAdj, RS);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex, SPAdj, RS);
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -529,7 +536,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
isIXAddr = true;
break;
}
-
+
+ bool noImmForm = false;
+ switch (OpC) {
+ case PPC::LVEBX:
+ case PPC::LVEHX:
+ case PPC::LVEWX:
+ case PPC::LVX:
+ case PPC::LVXL:
+ case PPC::LVSL:
+ case PPC::LVSR:
+ case PPC::STVEBX:
+ case PPC::STVEHX:
+ case PPC::STVEWX:
+ case PPC::STVX:
+ case PPC::STVXL:
+ noImmForm = true;
+ break;
+ }
+
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
if (!isIXAddr)
@@ -553,7 +578,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
+ (!noImmForm &&
+ isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -563,13 +589,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- unsigned SReg;
- if (requiresRegisterScavenging(MF)) {
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
- } else
- SReg = is64Bit ? PPC::X0 : PPC::R0;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC);
// Insert a set of rA with the full offset value before the ld, st, or add
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
@@ -584,7 +606,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
- if (OpC != TargetOpcode::INLINEASM) {
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
assert(ImmToIdxMap.count(OpC) &&
"No indexed form of load or store available!");
unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 9840666242..ba0fb48b85 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -44,16 +44,22 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
- virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ return true;
+ }
- /// requiresRegisterScavenging - We require a register scavenger.
- /// FIXME (64-bit): Should be inlined.
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ return true;
+ }
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ return true;
+ }
void lowerDynamicAlloc(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS) const;
@@ -61,6 +67,11 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+ int SPAdj, RegScavenger *RS) const;
+
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 8ee9b1ec9f..55d36bbfc9 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -86,6 +86,14 @@ foreach Index = 0-31 in {
DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
}
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "r0">;
+def ZERO8 : GP8<ZERO, "r0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
+
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
def CR0GT : CRBIT< 1, "1">;
@@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, LR)>;
+ R31, R0, R1, LR, FP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, LR8)>;
+ X31, X13, X0, X1, LR8, FP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
// ABI the size of the Floating-point register save area is determined by the
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index a59c775272..e740348717 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
>;
-class Vector_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
+class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
+ ValueType elemType, RegisterClass elemClass> : Pat <
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index 0c7880d232..fa8f62de9c 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
- if (i == 2)
- break;
SDValue CstOffset;
- if (!Operand.getValueType().isVector() &&
- SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
- Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
- Ops[SelIdx[i] - 1] = CstOffset;
- return true;
+ if (Operand.getValueType().isVector() ||
+ !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+ break;
+
+ // Gather others constants values
+ std::vector<unsigned> Consts;
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = OperandIdx[j];
+ if (SrcIdx < 0)
+ break;
+ if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
}
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts))
+ break;
+
+ Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Ops[SelIdx[i] - 1] = CstOffset;
+ return true;
}
- break;
case ISD::FNEG:
if (NegIdx[i] < 0)
break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index be3318a0b4..08650980fd 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+ const {
+ assert (Consts.size() <= 12 && "Too many operands in instructions group");
+ unsigned Pair1 = 0, Pair2 = 0;
+ for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+ unsigned ReadConstHalf = Consts[i] & 2;
+ unsigned ReadConstIndex = Consts[i] & (~3);
+ unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+ if (!Pair1) {
+ Pair1 = ReadHalfConst;
+ continue;
+ }
+ if (Pair1 == ReadHalfConst)
+ continue;
+ if (!Pair2) {
+ Pair2 = ReadHalfConst;
+ continue;
+ }
+ if (Pair2 != ReadHalfConst)
+ return false;
+ }
+ return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+ const MachineInstr *MI = MIs[i];
+
+ const R600Operands::Ops OpTable[3][2] = {
+ {R600Operands::SRC0, R600Operands::SRC0_SEL},
+ {R600Operands::SRC1, R600Operands::SRC1_SEL},
+ {R600Operands::SRC2, R600Operands::SRC2_SEL},
+ };
+
+ if (!isALUInstr(MI->getOpcode()))
+ continue;
+
+ for (unsigned j = 0; j < 3; j++) {
+ int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+ if (SrcIdx < 0)
+ break;
+ if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Const = MI->getOperand(
+ getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+ Consts.push_back(Const);
+ }
+ }
+ }
+ return fitsConstReadLimitations(Consts);
+}
+
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
const ScheduleDAG *DAG) const {
const InstrItineraryData *II = TM->getInstrItineraryData();
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index efe721c00c..bf9569e659 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -53,6 +53,9 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
+ bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+ bool canBundle(const std::vector<MachineInstr *> &) const;
+
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index c5fa3347dc..8c50d54cf5 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1979,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
-def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
+def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
// bitconvert patterns
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 40aec833ea..b07a585f08 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -14,5 +14,4 @@ using namespace llvm;
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
: MachineFunctionInfo() {
- memset(Outputs, 0, sizeof(Outputs));
}
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index 4b901f4bbc..13a46b8eec 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -26,7 +26,6 @@ public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
std::vector<unsigned> IndirectRegs;
- SDNode *Outputs[16];
};
} // End llvm namespace
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 19baef94c7..9074364bb3 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 15;
- memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
@@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
}
-class ConstPairs {
-private:
- unsigned XYPair;
- unsigned ZWPair;
-public:
- ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
- for (unsigned i = 0; i < 3; i++) {
- unsigned ReadConstChan = ReadConst[i] & 3;
- unsigned ReadConstIndex = ReadConst[i] & (~3);
- if (ReadConstChan < 2) {
- if (!XYPair) {
- XYPair = ReadConstIndex;
- }
- } else {
- if (!ZWPair) {
- ZWPair = ReadConstIndex;
- }
- }
- }
- }
-
- bool isCompatibleWith(const ConstPairs& CP) const {
- return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
- (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
- }
-};
-
-static
-const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
- unsigned ReadConsts[3] = {0, 0, 0};
- R600Operands::Ops OpTable[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL},
- };
-
- if (!TII->isALUInstr(MI.getOpcode()))
- return ConstPairs(ReadConsts);
-
- for (unsigned i = 0; i < 3; i++) {
- int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
- if (SrcIdx < 0)
- break;
- if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
- ReadConsts[i] =MI.getOperand(
- TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
- }
- return ConstPairs(ReadConsts);
-}
-
-bool
-R600SchedStrategy::isBundleable(const MachineInstr& MI) {
- const ConstPairs &MIPair = getPairs(TII, MI);
- for (unsigned i = 0; i < 4; i++) {
- if (!InstructionsGroupCandidate[i])
- continue;
- const ConstPairs &IGPair = getPairs(TII,
- *InstructionsGroupCandidate[i]->getInstr());
- if (!IGPair.isCompatibleWith(MIPair))
- return false;
- }
- return true;
-}
-
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
if (Q.empty())
return NULL;
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
It != E; ++It) {
SUnit *SU = *It;
- if (isBundleable(*SU->getInstr())) {
+ InstructionsGroupCandidate.push_back(SU->getInstr());
+ if (TII->canBundle(InstructionsGroupCandidate)) {
+ InstructionsGroupCandidate.pop_back();
Q.erase(It);
return SU;
+ } else {
+ InstructionsGroupCandidate.pop_back();
}
}
return NULL;
@@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
- memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+ InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
SUnit *SU = AttemptFillSlot(Chan);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
- InstructionsGroupCandidate[Chan] = SU;
+ InstructionsGroupCandidate.push_back(SU->getInstr());
return SU;
}
}
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index d74ff1e076..3d0367fd8e 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -98,7 +98,7 @@ public:
virtual void releaseBottomNode(SUnit *SU);
private:
- SUnit *InstructionsGroupCandidate[4];
+ std::vector<MachineInstr *> InstructionsGroupCandidate;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
@@ -112,7 +112,6 @@ private:
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
SUnit* pickOther(int QID);
- bool isBundleable(const MachineInstr& MI);
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
};
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 063f5faa63..93f8c38a3a 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties();
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
@@ -229,6 +234,10 @@ EVT SITargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
+MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+ return MVT::i32;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 0411565ee3..d65622526f 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -48,6 +48,7 @@ public:
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(EVT VT) const;
+ virtual MVT getScalarShiftAmountTy(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 67fbdf7be1..98bd3dbb66 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) {
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
-bool SIInsertWaits::unorderedDefines(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
- if (TSFlags & SIInstrFlags::LGKM_CNT)
- return true;
-
- if (TSFlags & SIInstrFlags::EXP_CNT)
- return ExpInstrTypesSeen == 3;
-
- return false;
-}
-
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
- bool UnorderedDefines = unorderedDefines(MI);
Counters Result = ZeroCounts;
// For each register affected by this
@@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
- if (UnorderedDefines)
- increaseCounters(Result, DefinedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
}
if (Op.isUse())
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 260c651dd4..2f10c388ad 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK <
opName#" $dst, $src0", pattern
>;
-multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> {
+multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
+ RegisterClass dstClass> {
def _IMM : SMRD <
op, 1, (outs dstClass:$dst),
- (ins SReg_64:$sbase, i32imm:$offset),
+ (ins baseClass:$sbase, i32imm:$offset),
asm#" $dst, $sbase, $offset", []
>;
def _SGPR : SMRD <
op, 0, (outs dstClass:$dst),
- (ins SReg_64:$sbase, SReg_32:$soff),
+ (ins baseClass:$sbase, SReg_32:$soff),
asm#" $dst, $sbase, $soff", []
>;
}
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 0ab9e4ec0c..05b04a92b3 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT
//def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>;
//def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>;
//def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>;
-//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>;
-//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>;
-//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>;
+def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
+def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
+def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
@@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>;
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
-//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>;
-//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>;
-//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>;
-//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>;
-//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>;
-//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>;
-//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>;
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
} // mayLoad = 1
@@ -840,7 +854,9 @@ defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>;
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+>;
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>;
let isCommutable = 1 in {
@@ -1133,6 +1149,31 @@ def SI_KILL : InstSI <
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
+let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+
+def SI_INDIRECT_SRC : InstSI <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off",
+ []
+>;
+
+class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
+ (outs rc:$dst, SReg_64:$temp),
+ (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val),
+ "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val",
+ []
+> {
+ let Constraints = "$src = $dst";
+}
+
+def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
+def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
+def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
+def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
+
+} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+
} // end IsCodeGenOnly, isPseudo
def : Pat<
@@ -1241,22 +1282,83 @@ defm : SamplePatterns<VReg_128, v4i32>;
defm : SamplePatterns<VReg_256, v8i32>;
defm : SamplePatterns<VReg_512, v16i32>;
-def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
-def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
-def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
-def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
+/********** ============================================ **********/
+/********** Extraction, Insertion, Building and Casting **********/
+/********** ============================================ **********/
+
+foreach Index = 0-2 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
+ i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2i32_#Index : Insert_Element <
+ i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v2f32_#Index : Extract_Element <
+ f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v2f32_#Index : Insert_Element <
+ f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-3 in {
+ def Extract_Element_v4i32_#Index : Extract_Element <
+ i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4i32_#Index : Insert_Element <
+ i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v4f32_#Index : Extract_Element <
+ f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v4f32_#Index : Insert_Element <
+ f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-7 in {
+ def Extract_Element_v8i32_#Index : Extract_Element <
+ i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8i32_#Index : Insert_Element <
+ i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
-def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
+ def Extract_Element_v8f32_#Index : Extract_Element <
+ f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v8f32_#Index : Insert_Element <
+ f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
+
+foreach Index = 0-15 in {
+ def Extract_Element_v16i32_#Index : Extract_Element <
+ i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16i32_#Index : Insert_Element <
+ i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+
+ def Extract_Element_v16f32_#Index : Extract_Element <
+ f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+ def Insert_Element_v16f32_#Index : Insert_Element <
+ f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ >;
+}
def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
+def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
+def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
+def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
+def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1389,6 +1491,24 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
>;
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+>;
+
+// 3. Offset in an 32Bit VGPR
+def : Pat <
+ (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+>;
+
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
@@ -1426,4 +1546,48 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+/********** ====================== **********/
+/********** Indirect adressing **********/
+/********** ====================== **********/
+
+multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
+ SI_INDIRECT_DST IndDst> {
+ // 1. Extract with offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ >;
+
+ // 2. Extract without offset
+ def : Pat<
+ (vector_extract (vt rc:$vec),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ >;
+
+ // 3. Insert with offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ >;
+
+ // 4. Insert without offset
+ def : Pat<
+ (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
+ (i64 (zext (i32 VReg_32:$idx)))
+ ),
+ (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ >;
+}
+
+defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+
} // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 7c23d1706b..33bb8157b4 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -16,8 +16,7 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- /* XXX: We may need a seperate intrinsic here for loading integer values */
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
def int_SI_wqm : Intrinsic <[], [], []>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index b215aa22db..9a027e77eb 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -66,6 +66,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
+ const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -84,9 +85,14 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
+ void IndirectSrc(MachineInstr &MI);
+ void IndirectDst(MachineInstr &MI);
+
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
+ MachineFunctionPass(ID), TRI(tm.getRegisterInfo()),
+ TII(tm.getInstrInfo()) { }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator I = MI;
+
+ unsigned Save = MI.getOperand(1).getReg();
+ unsigned Idx = MI.getOperand(3).getReg();
+
+ if (AMDGPU::SReg_32RegClass.contains(Idx)) {
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(Idx);
+ MBB.insert(I, MovRel);
+ MI.eraseFromParent();
+ return;
+ }
+
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
+
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
+
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+ .addReg(Idx);
+
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC);
+
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
+
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+
+ // Do the actual move
+ MBB.insert(I, MovRel);
+
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
+
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
+
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+
+ MI.eraseFromParent();
+}
+
+void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Vec = MI.getOperand(2).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+ .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Vec, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
+void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned Off = MI.getOperand(4).getImm();
+ unsigned Val = MI.getOperand(5).getReg();
+
+ MachineInstr *MovRel =
+ BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
+ .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(Val)
+ .addReg(AMDGPU::M0, RegState::Implicit)
+ .addReg(Dst, RegState::Implicit);
+
+ LoadM0(MI, MovRel);
+}
+
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
bool HaveKill = false;
@@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::S_BRANCH:
Branch(MI);
break;
+
+ case AMDGPU::SI_INDIRECT_SRC:
+ IndirectSrc(MI);
+ break;
+
+ case AMDGPU::SI_INDIRECT_DST_V2:
+ case AMDGPU::SI_INDIRECT_DST_V4:
+ case AMDGPU::SI_INDIRECT_DST_V8:
+ case AMDGPU::SI_INDIRECT_DST_V16:
+ IndirectDst(MI);
+ break;
}
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 3dcad506d2..4f14931a9c 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
+def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
-def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
-def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
// [SV]Src_* register classes, can have either an immediate or an register
@@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>;
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
-def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
+def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>;
def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
-def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
+def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 357879bf6c..b53a1ed095 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index c6cafe59eb..ee88ce77c0 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -610,6 +610,9 @@ struct StringComparator {
// Provided for compatibility with MSVC's debug mode.
bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
+ bool operator()(const char *LHS, const char *RHS) const {
+ return std::strcmp(LHS, RHS) < 0;
+ }
};
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 7d8b49cdf2..e7282519d5 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() {
delete AsmInfo;
}
+/// \brief Reset the target options based on the function's attributes.
+void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
+ const Function *F = MF->getFunction();
+ TargetOptions &TO = MF->getTarget().Options;
+
+#define RESET_OPTION(X, Y) \
+ do { \
+ if (F->hasFnAttribute(Y)) \
+ TO.X = \
+ (F->getAttributes(). \
+ getAttribute(AttributeSet::FunctionIndex, \
+ Y).getValueAsString() == "true"); \
+ } while (0)
+
+ RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim");
+ RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf");
+ RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
+ RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
+ RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
+ RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(UseSoftFloat, "use-soft-float");
+ RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+}
+
/// getRelocationModel - Returns the code generation relocation model. The
/// choices are static, PIC, and dynamic-no-pic, and target default.
Reloc::Model TargetMachine::getRelocationModel() const {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ee5c2b2bfd..d5568e08d3 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -62,6 +62,9 @@ private:
X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
+ SMLoc SizeDirLoc, unsigned Size);
+
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
SmallString<64> &Err);
@@ -170,30 +173,33 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc OffsetOfLoc;
bool AddressOf;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegOp {
+ unsigned RegNo;
+ };
+
+ struct ImmOp {
+ const MCExpr *Val;
+ };
+
+ struct MemOp {
+ unsigned SegReg;
+ const MCExpr *Disp;
+ unsigned BaseReg;
+ unsigned IndexReg;
+ unsigned Scale;
+ unsigned Size;
+ };
+
union {
- struct {
- const char *Data;
- unsigned Length;
- } Tok;
-
- struct {
- unsigned RegNo;
- } Reg;
-
- struct {
- const MCExpr *Val;
- bool NeedAsmRewrite;
- } Imm;
-
- struct {
- unsigned SegReg;
- const MCExpr *Disp;
- unsigned BaseReg;
- unsigned IndexReg;
- unsigned Scale;
- unsigned Size;
- bool NeedSizeDir;
- } Mem;
+ struct TokOp Tok;
+ struct RegOp Reg;
+ struct ImmOp Imm;
+ struct MemOp Mem;
};
X86Operand(KindTy K, SMLoc Start, SMLoc End)
@@ -231,11 +237,6 @@ struct X86Operand : public MCParsedAsmOperand {
return Imm.Val;
}
- bool needAsmRewrite() const {
- assert(Kind == Immediate && "Invalid access!");
- return Imm.NeedAsmRewrite;
- }
-
const MCExpr *getMemDisp() const {
assert(Kind == Memory && "Invalid access!");
return Mem.Disp;
@@ -332,11 +333,6 @@ struct X86Operand : public MCParsedAsmOperand {
return isImmSExti64i32Value(CE->getValue());
}
- unsigned getMemSize() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.Size;
- }
-
bool isOffsetOf() const {
return OffsetOfLoc.getPointer();
}
@@ -345,11 +341,6 @@ struct X86Operand : public MCParsedAsmOperand {
return AddressOf;
}
- bool needSizeDirective() const {
- assert(Kind == Memory && "Invalid access!");
- return Mem.NeedSizeDir;
- }
-
bool isMem() const { return Kind == Memory; }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
@@ -485,17 +476,15 @@ struct X86Operand : public MCParsedAsmOperand {
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc,
- bool NeedRewrite = true){
+ static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
- Res->Imm.NeedAsmRewrite = NeedRewrite;
return Res;
}
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -503,7 +492,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
Res->AddressOf = false;
return Res;
}
@@ -512,7 +500,7 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, bool NeedSizeDir = false) {
+ unsigned Size = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -527,7 +515,6 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
- Res->Mem.NeedSizeDir = NeedSizeDir;
Res->AddressOf = false;
return Res;
}
@@ -890,6 +877,45 @@ public:
}
};
+X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
+ SMLoc End, SMLoc SizeDirLoc,
+ unsigned Size) {
+ bool NeedSizeDir = false;
+ bool IsVarDecl = false;
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ unsigned tLength, tSize, tType;
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
+ tSize, tType, IsVarDecl);
+ if (!Size) {
+ Size = tType * 8; // Size is in terms of bits in this context.
+ NeedSizeDir = Size > 0;
+ }
+ }
+
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
+ }
+
+ if (NeedSizeDir)
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
+ /*Len*/0, Size));
+
+ // When parsing inline assembly we set the base register to a non-zero value
+ // as we don't know the actual value at this time. This is necessary to
+ // get the matching correct in some cases.
+ return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
+ /*Scale*/1, Start, End, Size);
+}
+
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
unsigned Size) {
const AsmToken &Tok = Parser.getTok();
@@ -914,7 +940,12 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
// Adjust the EndLoc due to the ']'.
End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
Parser.Lex();
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ if (!isParsingInlineAsm())
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ // We want the size directive before the '['.
+ SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
+ return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
}
}
@@ -1036,40 +1067,9 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
if (getParser().parseExpression(Disp, End))
return 0;
- bool NeedSizeDir = false;
- bool IsVarDecl = false;
- if (isParsingInlineAsm()) {
- if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
- const MCSymbol &Sym = SymRef->getSymbol();
- // FIXME: The SemaLookup will fail if the name is anything other then an
- // identifier.
- // FIXME: Pass a valid SMLoc.
- unsigned tLength, tSize, tType;
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
- tSize, tType, IsVarDecl);
- if (!Size)
- Size = tType * 8; // Size is in terms of bits in this context.
- NeedSizeDir = Size > 0;
- }
- }
if (!isParsingInlineAsm())
return X86Operand::CreateMem(Disp, Start, End, Size);
- else {
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (!IsVarDecl) {
- unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
- }
-
- // When parsing inline assembly we set the base register to a non-zero value
- // as we don't know the actual value at this time. This is necessary to
- // get the matching correct in some cases.
- return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
- /*Scale*/1, Start, End, Size, NeedSizeDir);
- }
+ return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
}
/// Parse the '.' operator.
@@ -1197,7 +1197,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
- return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false);
+ return X86Operand::CreateImm(Imm, Start, End);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
@@ -1221,6 +1221,8 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
getLexer().is(AsmToken::Minus)) {
const MCExpr *Val;
if (!getParser().parseExpression(Val, End)) {
+ if (isParsingInlineAsm())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
return X86Operand::CreateImm(Val, Start, End);
}
}
@@ -1734,242 +1736,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
- switch (Inst.getOpcode()) {
- default: return false;
- case X86::AND16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::AND64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::AND64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::XOR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::XOR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::OR64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::OR64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::CMP64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
+ bool isCmp) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode);
+ if (!isCmp)
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(MCOperand::CreateReg(Reg));
+ TmpInst.addOperand(Inst.getOperand(0));
+ Inst = TmpInst;
+ return true;
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::CMP64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::ADD64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::ADD64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB16i16: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB16ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB32i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
+static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
+ bool isCmp = false) {
+ if (!Inst.getOperand(0).isImm() ||
+ !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+ return false;
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB32ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
- case X86::SUB64i32: {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
+ return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
+}
- MCInst TmpInst;
- TmpInst.setOpcode(X86::SUB64ri8);
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
- }
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+ switch (Inst.getOpcode()) {
+ default: return false;
+ case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
+ case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
+ case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
+ case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
+ case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
+ case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
+ case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
+ case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
+ case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
+ case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
+ case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
+ case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
+ case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
+ case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
+ case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
+ case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
+ case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
+ case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
+ case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
+ case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
+ case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
+ case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
+ case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
+ case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 122204ae75..5fbefaec5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// dst(ModR/M), src1(ModR/M)
// dst(ModR/M), src1(ModR/M), imm8
//
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
CurOp++;
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ ++SrcRegNum;
+
EmitRegModRMByte(MI.getOperand(CurOp),
- GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
- CurOp += 2;
+ GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMDestMem:
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ece38aa346..2518e02e2a 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
const MCInstrDesc *Desc) const {
bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
// VEX_R: opcode externsion equivalent to REX.R in
// 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
if (HasVEX_4V)
VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+ CurOp++;
+
if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
CurOp++;
@@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MRMDestReg instructions forms:
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M)
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
break;
case X86II::MRM0r: case X86II::MRM1r:
@@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::MRMDestReg: {
MCE.emitByte(BaseOpcode);
+
+ unsigned SrcRegNum = CurOp+1;
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
emitRegModRMByte(MI.getOperand(CurOp).getReg(),
- getX86RegNum(MI.getOperand(CurOp+1).getReg()));
- CurOp += 2;
+ getX86RegNum(MI.getOperand(SrcRegNum).getReg()));
+ CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem: {
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b5c3270065..85155f55e0 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (Subtarget->isTargetWindows())
+ return false;
+
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 960870dc60..23cfd6d72f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
- if (Subtarget->hasInt256()) {
- setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SRL, MVT::v4i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v4i32, Legal);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Legal);
- } else {
- setOperationAction(ISD::SRL, MVT::v2i64, Custom);
- setOperationAction(ISD::SRL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v2i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
- setOperationAction(ISD::SHL, MVT::v2i64, Custom);
- setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- setOperationAction(ISD::SRA, MVT::v4i32, Custom);
- }
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
@@ -1186,14 +1179,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
- setOperationAction(ISD::SRL, MVT::v4i64, Legal);
- setOperationAction(ISD::SRL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SHL, MVT::v4i64, Legal);
- setOperationAction(ISD::SHL, MVT::v8i32, Legal);
-
- setOperationAction(ISD::SRA, MVT::v8i32, Legal);
-
setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
@@ -1210,15 +1195,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
// Don't lower v32i8 because there is no 128-bit byte mul
+ }
- setOperationAction(ISD::SRL, MVT::v4i64, Custom);
- setOperationAction(ISD::SRL, MVT::v8i32, Custom);
+ // In the customized shift lowering, the legal cases in AVX2 will be
+ // recognized.
+ setOperationAction(ISD::SRL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SRL, MVT::v8i32, Custom);
- setOperationAction(ISD::SHL, MVT::v4i64, Custom);
- setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v4i64, Custom);
+ setOperationAction(ISD::SHL, MVT::v8i32, Custom);
- setOperationAction(ISD::SRA, MVT::v8i32, Custom);
- }
+ setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -7834,7 +7821,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Chain.getValue(1));
}
- if (Subtarget->isTargetWindows()) {
+ if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) {
// Just use the implicit TLS architecture
// Need to generate someting similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
@@ -7854,18 +7841,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
// Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
- // %gs:0x58 (64-bit).
+ // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly
+ // use its literal value of 0x2C.
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58)
- : DAG.getExternalSymbol("_tls_array",
- getPointerTy()),
+ SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) :
+ (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) :
+ DAG.getExternalSymbol("_tls_array", getPointerTy()));
+
+ SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr),
false, false, false, 0);
@@ -11490,16 +11478,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
-
+static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- if (!Subtarget->hasSSE2())
- return SDValue();
-
// Optimize shl/srl/sra with constant shift amount.
if (isSplatVector(Amt.getNode())) {
SDValue SclrAmt = Amt->getOperand(0);
@@ -11610,6 +11595,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
+ uint64_t ShiftAmt = 0;
+ for (unsigned i = 0; i != Ratio; ++i) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
+ }
+ // Check remaining shift amounts.
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (C == 0)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ }
+ }
+
+ return SDValue();
+}
+
+static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget* Subtarget) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasInt256() &&
+ ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
+ VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ SDValue BaseShAmt;
+ EVT EltVT = VT.getVectorElementType();
+
+ if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i, j;
+ for (i = 0; i != NumElts; ++i) {
+ if (Amt.getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ break;
+ }
+ for (j = i; j != NumElts; ++j) {
+ SDValue Arg = Amt.getOperand(j);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ if (Arg != Amt.getOperand(i))
+ break;
+ }
+ if (i != NumElts && j == NumElts)
+ BaseShAmt = Amt.getOperand(i);
+ } else {
+ if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ Amt = Amt.getOperand(0);
+ if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ cast<ShuffleVectorSDNode>(Amt)->isSplat()) {
+ SDValue InVec = Amt.getOperand(0);
+ if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+ unsigned i = 0;
+ for (; i != NumElts; ++i) {
+ SDValue Arg = InVec.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) continue;
+ BaseShAmt = Arg;
+ break;
+ }
+ } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned SplatIdx =
+ cast<ShuffleVectorSDNode>(Amt)->getSplatIndex();
+ if (C->getZExtValue() == SplatIdx)
+ BaseShAmt = InVec.getOperand(1);
+ }
+ }
+ if (BaseShAmt.getNode() == 0)
+ BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
+ DAG.getIntPtrConstant(0));
+ }
+ }
+
+ if (BaseShAmt.getNode()) {
+ if (EltVT.bitsGT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt);
+ else if (EltVT.bitsLT(MVT::i32))
+ BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRA:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
+ }
+ case ISD::SRL:
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
+ }
+ }
+ }
+ }
+
+ // Special case in 32-bit mode, where i64 is expanded into high and low parts.
+ if (!Subtarget->is64Bit() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Amt.getOpcode() == ISD::BITCAST &&
+ Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ Amt = Amt.getOperand(0);
+ unsigned Ratio = Amt.getValueType().getVectorNumElements() /
+ VT.getVectorNumElements();
+ std::vector<SDValue> Vals(Ratio);
+ for (unsigned i = 0; i != Ratio; ++i)
+ Vals[i] = Amt.getOperand(i);
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ for (unsigned j = 0; j != Ratio; ++j)
+ if (Vals[j] != Amt.getOperand(i + j))
+ return SDValue();
+ }
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ SDValue V;
+
+ if (!Subtarget->hasSSE2())
+ return SDValue();
+
+ V = LowerScalarImmediateShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ V = LowerScalarVariableShift(Op, DAG, Subtarget);
+ if (V.getNode())
+ return V;
+
+ // AVX2 has VPSLLV/VPSRAV/VPSRLV.
+ if (Subtarget->hasInt256()) {
+ if (Op.getOpcode() == ISD::SRL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SHL &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v4i64 || VT == MVT::v8i32))
+ return Op;
+ if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
+ return Op;
+ }
+
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
@@ -11826,8 +12029,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
- Op.getOperand(0), ShAmt, DAG);
+ // (sext (vzext x)) -> (vsext x)
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op00 = Op0.getOperand(0);
+ SDValue Tmp1;
+ // Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
+ if (Op0.getOpcode() == ISD::BITCAST &&
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
+ Tmp1 = LowerVectorIntExtend(Op00, DAG);
+ if (Tmp1.getNode()) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+
+ // If the above didn't work, then just use Shift-Left + Shift-Right.
+ Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
}
}
@@ -12262,7 +12480,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UINT_TO_FP: {
- if (N->getOperand(0).getValueType() != MVT::v2i32 &&
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
@@ -15918,124 +16137,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
- EVT VT = N->getValueType(0);
if (N->getOpcode() == ISD::SHL) {
SDValue V = PerformSHLCombine(N, DAG);
if (V.getNode()) return V;
}
- // On X86 with SSE2 support, we can transform this to a vector shift if
- // all elements are shifted by the same amount. We can't do this in legalize
- // because the a constant vector is typically transformed to a constant pool
- // so we have no knowledge of the shift amount.
- if (!Subtarget->hasSSE2())
- return SDValue();
-
- if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
- (!Subtarget->hasInt256() ||
- (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
- return SDValue();
-
- SDValue ShAmtOp = N->getOperand(1);
- EVT EltVT = VT.getVectorElementType();
- DebugLoc DL = N->getDebugLoc();
- SDValue BaseShAmt = SDValue();
- if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = VT.getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- // Handle the case where the build_vector is all undef
- // FIXME: Should DAG allow this?
- if (i == NumElts)
- return SDValue();
-
- for (; i != NumElts; ++i) {
- SDValue Arg = ShAmtOp.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- if (Arg != BaseShAmt) {
- return SDValue();
- }
- }
- } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
- cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
- SDValue InVec = ShAmtOp.getOperand(0);
- if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
- unsigned i = 0;
- for (; i != NumElts; ++i) {
- SDValue Arg = InVec.getOperand(i);
- if (Arg.getOpcode() == ISD::UNDEF) continue;
- BaseShAmt = Arg;
- break;
- }
- } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
- if (C->getZExtValue() == SplatIdx)
- BaseShAmt = InVec.getOperand(1);
- }
- }
- if (BaseShAmt.getNode() == 0) {
- // Don't create instructions with illegal types after legalize
- // types has run.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
- !DCI.isBeforeLegalize())
- return SDValue();
-
- BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
- DAG.getIntPtrConstant(0));
- }
- } else
- return SDValue();
-
- // The shift amount is an i32.
- if (EltVT.bitsGT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
- else if (EltVT.bitsLT(MVT::i32))
- BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
-
- // The shift amount is identical so we can do a vector shift.
- SDValue ValOp = N->getOperand(0);
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Unknown shift opcode!");
- case ISD::SHL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRA:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- case ISD::SRL:
- switch (VT.getSimpleVT().SimpleTy) {
- default: return SDValue();
- case MVT::v2i64:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v4i64:
- case MVT::v8i32:
- case MVT::v16i16:
- return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
- }
- }
+ return SDValue();
}
// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..))
@@ -16346,13 +16453,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
// Validate that the Mask operand is a vector sra node.
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
- if (Mask.getOpcode() != X86ISD::VSRAI)
- return SDValue();
-
- // Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(1);
- unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+ unsigned SraAmt = ~0;
+ if (Mask.getOpcode() == ISD::SRA) {
+ SDValue Amt = Mask.getOperand(1);
+ if (isSplatVector(Amt.getNode())) {
+ SDValue SclrAmt = Amt->getOperand(0);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
+ SraAmt = C->getZExtValue();
+ }
+ } else if (Mask.getOpcode() == X86ISD::VSRAI) {
+ SDValue SraC = Mask.getOperand(1);
+ SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
+ }
if ((SraAmt + 1) != EltBits)
return SDValue();
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index d86a4065a7..19bdb96f05 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
// LEA - Load Effective Address
-
+let SchedRW = [WriteLEA] in {
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
@@ -36,41 +36,52 @@ let isReMaterializable = 1 in
def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
-
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions.
//
+// SchedModel info for instruction that loads one value and gets the second
+// (and possibly third) value from a register.
+// This is used for instructions that put the memory operands before other
+// uses.
+class SchedLoadReg<SchedWrite SW> : Sched<[SW,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Register reads (implicit or explicit).
+ ReadAfterLd, ReadAfterLd]>;
+
// Extra precision multiplication
// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8
-
+ (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
"mul{w}\t$src",
- [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16
-
+ [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src", // EAX,EDX = EAX*GR32
+ "mul{l}\t$src",
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
- IIC_MUL32_REG>;
+ IIC_MUL32_REG>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src", // RAX,RDX = RAX*GR64
+ "mul{q}\t$src",
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
- IIC_MUL64>;
-
+ IIC_MUL64>, Sched<[WriteIMul]>;
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
@@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
[(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8]
-
+ (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
"mul{w}\t$src",
- [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
-
+ [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
"mul{l}\t$src",
- [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64]
+ "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>;
}
let neverHasSideEffects = 1 in {
+// AL,AH = AL*GR8
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [],
- IIC_IMUL8>; // AL,AH = AL*GR8
+ IIC_IMUL8>, Sched<[WriteIMul]>;
+// AX,DX = AX*GR16
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [],
- IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16
+ IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>;
+// EAX,EDX = EAX*GR32
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [],
- IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+ IIC_IMUL32_RR>, Sched<[WriteIMul]>;
+// RAX,RDX = RAX*GR64
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
- IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
+ IIC_IMUL64_RR>, Sched<[WriteIMul]>;
let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8]
+ "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>;
+// AX,DX = AX*[mem16]
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
- // AX,DX = AX*[mem16]
+ "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize,
+ SchedLoadReg<WriteIMulLd>;
+// EAX,EDX = EAX*[mem32]
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32]
+ "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>;
+// RAX,RDX = RAX*[mem64]
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64]
+ "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>;
}
} // neverHasSideEffects
@@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
+let isCommutable = 1, SchedRW = [WriteIMul] in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
TB;
-}
+} // isCommutable, SchedRW
// Register-Memory Signed Integer Multiply
+let SchedRW = [WriteIMulLd, ReadAfterLd] in {
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
@@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(X86smul_flag GR64:$src1, (load addr:$src2)))],
IIC_IMUL64_RM>,
TB;
+} // SchedRW
} // Constraints = "$src1 = $dst"
} // Defs = [EFLAGS]
// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+let SchedRW = [WriteIMul] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
@@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, i64immSExt8:$src2))],
IIC_IMUL64_RRI>;
-
+} // SchedRW
// Memory-Integer Signed Integer Multiply
+let SchedRW = [WriteIMulLd] in {
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(X86smul_flag (load addr:$src1),
i64immSExt8:$src2))],
IIC_IMUL64_RMI>;
+} // SchedRW
} // Defs = [EFLAGS]
@@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
// unsigned division/remainder
let hasSideEffects = 1 in { // so that we don't speculatively execute
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", [], IIC_DIV8_REG>;
@@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
"div{q}\t$src", [], IIC_DIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", [], IIC_DIV8_MEM>;
+ "div{b}\t$src", [], IIC_DIV8_MEM>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", [], IIC_DIV16>, OpSize;
+ "div{w}\t$src", [], IIC_DIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", [], IIC_DIV32>;
+ "div{l}\t$src", [], IIC_DIV32>,
+ SchedLoadReg<WriteIDivLd>;
// RDX:RAX/[mem64] = RAX,RDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", [], IIC_DIV64>;
+ "div{q}\t$src", [], IIC_DIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
// Signed division/remainder.
+let SchedRW = [WriteIDiv] in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", [], IIC_IDIV8>;
@@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
"idiv{q}\t$src", [], IIC_IDIV64>;
+} // SchedRW
let mayLoad = 1 in {
let Defs = [AL,EFLAGS,AX], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", [], IIC_IDIV8>;
+ "idiv{b}\t$src", [], IIC_IDIV8>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
+ "idiv{w}\t$src", [], IIC_IDIV16>, OpSize,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", [], IIC_IDIV32>;
+ "idiv{l}\t$src", [], IIC_IDIV32>,
+ SchedLoadReg<WriteIDivLd>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", [], IIC_IDIV64>;
+ "idiv{q}\t$src", [], IIC_IDIV64>,
+ SchedLoadReg<WriteIDivLd>;
}
} // hasSideEffects = 0
@@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
"neg{b}\t$dst",
[(set GR8:$dst, (ineg GR8:$src1)),
@@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src1)),
(implicit EFLAGS)], IIC_UNARY_REG>;
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+// Read-modify-write negate.
+let SchedRW = [WriteALULd, WriteRMW] in {
def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
"neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
@@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
+} // SchedRW
} // Defs = [EFLAGS]
// Note: NOT does not set EFLAGS!
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
@@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
}
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
+let SchedRW = [WriteALULd, WriteRMW] in {
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
"not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
@@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
[(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
+} // SchedRW
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
@@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>,
Requires<[In64BitMode]>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
@@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
IIC_UNARY_REG>;
} // CodeSize = 2
-} // Constraints = "$src1 = $dst"
+} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2 in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -532,7 +575,7 @@ let CodeSize = 2 in {
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
-} // CodeSize = 2
+} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
@@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f = MRMDestReg>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
+ Sched<[WriteALU]>;
// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
// just a regclass (no eflags) as a result.
@@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
@@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern, bits<8> opcode = 0x80>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ Sched<[WriteALULd, WriteRMW]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []> {
+ mnemonic, operands, []>, Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg];
@@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in {
// register class is constrained to GR8_NOREX.
let isPseudo = 1 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
- "", [], IIC_BIN_NONMEM>;
+ "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
}
//===----------------------------------------------------------------------===//
@@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))],
- IIC_BIN_NONMEM>;
+ IIC_BIN_NONMEM>, Sched<[WriteALU]>;
def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, EFLAGS,
- (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>;
+ (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>,
+ Sched<[WriteALULd, ReadAfterLd]>;
}
let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in {
// ADCX Instruction
//
let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adcx{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8, OpSize;
@@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adcx{q}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
- let mayLoad = 1 in {
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adcx{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_MEM>, T8, OpSize;
@@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
// ADOX Instruction
//
let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
+ let SchedRW = [WriteALU] in {
def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8XS;
@@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in {
def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"adox{q}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>;
+ } // SchedRW
- let mayLoad = 1 in {
+ let mayLoad = 1, SchedRW = [WriteALULd] in {
def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"adox{l}\t{$src, $dst|$dst, $src}",
[], IIC_BIN_MEM>, T8XS;
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 8f2d0a1aae..a967a4da5c 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -16,7 +16,7 @@
// SetCC instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
- isCommutable = 1 in {
+ isCommutable = 1, SchedRW = [WriteALU] in {
def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
IIC_CMOV32_RR>, TB;
}
- let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+ let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+ SchedRW = [WriteALULd, ReadAfterLd] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
@@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
def r : I<opc, MRM0r, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
- IIC_SET_R>, TB;
+ IIC_SET_R>, TB, Sched<[WriteALU]>;
def m : I<opc, MRM0m, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
[(store (X86setcc OpNode, EFLAGS), addr:$dst)],
- IIC_SET_M>, TB;
+ IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>;
} // Uses = [EFLAGS]
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 734e5982b2..2b27bc5bc5 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR32:$addr)], IIC_RET>;
+ [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, isCodeGenOnly = 1 in {
def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
"ret\t#eh_return, addr: $addr",
- [(X86ehret GR64:$addr)], IIC_RET>;
+ [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>;
}
@@ -220,7 +220,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
- [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
// encoding and avoids a partial-register update sometimes, but doing so
@@ -229,11 +229,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
// to an MCInst.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
"",
- [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
+ [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize,
+ Sched<[WriteZero]>;
// FIXME: Set encoding to pseudo.
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
- [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
}
// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -245,7 +246,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
let Defs = [EFLAGS], isCodeGenOnly=1,
AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
- [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
+ [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -254,10 +255,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
isCodeGenOnly = 1 in
def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
"", [(set GR64:$dst, i64immZExt32:$src)],
- IIC_ALU_NONMEM>;
+ IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// Use sbb to materialize carry bit.
-let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in {
+let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in {
// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
// However, Pat<> can't replicate the destination reg into the inputs of the
// result.
@@ -990,9 +991,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
-// This corresponds to mov foo@tpoff(%rbx), %eax
-def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
- (MOV64rm tglobaltlsaddr :$dst)>;
// Direct PC relative function call for small code model. 32-bit displacement
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 2eb454ded2..5ef0c3c13b 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in {
let neverHasSideEffects = 1 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALULd]>;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
let neverHasSideEffects = 1 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
- TB, OpSize;
+ TB, OpSize, Sched<[WriteALULd]>;
} // neverHasSideEffects = 1
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALULd]>;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
@@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
- [], IIC_MOVZX>, TB;
+ [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -106,24 +112,28 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
+ [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB,
+ Sched<[WriteALU]>;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
- TB;
+ TB, Sched<[WriteALULd]>;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>,
+ Sched<[WriteALU]>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
+ [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>,
+ Sched<[WriteALULd]>;
// movzbq and movzwq encodings for the disassembler
def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
@@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in {
// Use movzbl instead of movzbq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
- "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
- TB;
+ TB, Sched<[WriteALULd]>;
// Use movzwl instead of movzwq when the destination is a register; it's
// equivalent due to implicit zero-extending, and it has a smaller encoding.
def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
+ "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB,
+ Sched<[WriteALU]>;
def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
- IIC_MOVZX>, TB;
+ IIC_MOVZX>, TB, Sched<[WriteALULd]>;
// There's no movzlq instruction, but movl can be used for this purpose, using
// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
// necessarily all zero. In such cases, we fall back to these explicit zext
// instructions.
def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
- "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
+ "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>,
+ Sched<[WriteALU]>;
def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
- IIC_MOVZX>;
+ IIC_MOVZX>, Sched<[WriteALULd]>;
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d989ec7bb0..39165e24a8 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -786,7 +786,7 @@ def LEAVE64 : I<0xC9, RawFrm,
//
let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG16>, OpSize;
def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
@@ -803,9 +803,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [],
def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize;
def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>,
Requires<[In32BitMode]>;
-}
+} // mayLoad, SchedRW
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize;
def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
@@ -832,29 +832,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>,
def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>,
Requires<[In32BitMode]>;
-}
+} // mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
-let mayLoad = 1 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
def POP64r : I<0x58, AddRegFrm,
(outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>;
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>;
def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [],
IIC_POP_MEM>;
-}
-let mayStore = 1 in {
+} // mayLoad, SchedRW
+let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH64r : I<0x50, AddRegFrm,
(outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>;
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>;
-}
+} // mayStore, SchedRW
}
-let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1,
+ SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
@@ -871,17 +872,18 @@ def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>,
Requires<[In64BitMode]>;
let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
- mayLoad=1, neverHasSideEffects=1 in {
+ mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>,
Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
- mayStore=1, neverHasSideEffects=1 in {
+ mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>,
Requires<[In32BitMode]>;
}
-let Constraints = "$src = $dst" in { // GR32 = bswap GR32
+let Constraints = "$src = $dst", SchedRW = [WriteALU] in {
+// GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -890,7 +892,7 @@ def BSWAP32r : I<0xC8, AddRegFrm,
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB;
-} // Constraints = "$src = $dst"
+} // Constraints = "$src = $dst", SchedRW
// Bit scan instructions.
let Defs = [EFLAGS] in {
@@ -976,7 +978,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>;
//===----------------------------------------------------------------------===//
// Move Instructions.
//
-
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in {
def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
@@ -987,6 +989,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
+
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
@@ -1004,7 +1007,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
}
+} // SchedRW
+let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
@@ -1017,9 +1022,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+let SchedRW = [WriteALU] in {
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1038,6 +1045,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
+}
// FIXME: These definitions are utterly broken
// Just leave them commented out for now because they're useless outside
@@ -1055,7 +1063,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
*/
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1066,7 +1074,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>;
@@ -1081,6 +1089,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
[(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>;
}
+let SchedRW = [WriteStore] in {
def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(store GR8:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1093,6 +1102,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store GR64:$src, addr:$dst)], IIC_MOV_MEM>;
+} // SchedRW
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
@@ -1101,34 +1111,37 @@ let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>;
+ "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
+ Sched<[WriteMove]>;
let mayStore = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteStore]>;
let mayLoad = 1, neverHasSideEffects = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
- IIC_MOV_MEM>;
+ IIC_MOV_MEM>, Sched<[WriteLoad]>;
}
// Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
[(set EFLAGS, (X86sahf AH))], IIC_AHF>;
let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
IIC_AHF>; // AH = flags
-
+} // SchedRW
//===----------------------------------------------------------------------===//
// Bit tests instructions: BT, BTS, BTR, BTC.
let Defs = [EFLAGS] in {
+let SchedRW = [WriteALU] in {
def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>,
@@ -1139,13 +1152,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB;
+} // SchedRW
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
// perspective, this is pretty bizarre. Make these instructions disassembly
// only for now.
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
// [(X86bt (loadi16 addr:$src1), GR16:$src2),
@@ -1166,6 +1180,7 @@ let mayLoad = 1, hasSideEffects = 0 in {
>, TB;
}
+let SchedRW = [WriteALU] in {
def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))],
@@ -1178,10 +1193,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))],
IIC_BT_RI>, TB;
+} // SchedRW
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
+let SchedRW = [WriteALU] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
@@ -1194,8 +1211,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))], IIC_BT_MI>, TB;
+} // SchedRW
let hasSideEffects = 0 in {
+let SchedRW = [WriteALU] in {
def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1203,8 +1222,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd] in {
def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1214,6 +1234,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1221,8 +1242,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1232,6 +1254,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
}
+let SchedRW = [WriteALU] in {
def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1239,6 +1262,7 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
let mayLoad = 1, mayStore = 1 in {
def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
@@ -1250,6 +1274,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1257,8 +1282,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1268,6 +1294,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB;
}
+let SchedRW = [WriteALU] in {
def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>,
OpSize, TB;
@@ -1275,8 +1302,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>,
OpSize, TB;
@@ -1286,6 +1314,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB;
}
+let SchedRW = [WriteALU] in {
def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>,
OpSize, TB;
@@ -1293,8 +1322,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
"bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB;
+} // SchedRW
-let mayLoad = 1, mayStore = 1 in {
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in {
def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>,
OpSize, TB;
@@ -1483,6 +1513,7 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
// Table lookup instructions
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>;
+let SchedRW = [WriteMicrocoded] in {
// ASCII Adjust After Addition
// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
@@ -1512,7 +1543,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
Requires<[In32BitMode]>;
+} // SchedRW
+let SchedRW = [WriteSystem] in {
// Check Array Index Against Bounds
def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize,
@@ -1528,11 +1561,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[In32BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// MOVBE Instructions
//
let Predicates = [HasMOVBE] in {
+ let SchedRW = [WriteALULd] in {
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>,
@@ -1545,6 +1580,8 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>,
T8;
+ }
+ let SchedRW = [WriteStore] in {
def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"movbe{w}\t{$src, $dst|$dst, $src}",
[(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>,
@@ -1557,6 +1594,7 @@ let Predicates = [HasMOVBE] in {
"movbe{q}\t{$src, $dst|$dst, $src}",
[(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>,
T8;
+ }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 0979752757..4d43ee1f15 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -16,6 +16,8 @@
class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
InstrItinClass rr = arg_rr;
InstrItinClass rm = arg_rm;
+ // InstrSchedModel info.
+ X86FoldableSchedWrite Sched = WriteFAdd;
}
class SizeItins<OpndItins arg_s, OpndItins arg_d> {
@@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
// scalar
+let Sched = WriteFAdd in {
def SSE_ALU_F32S : OpndItins<
IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
>;
@@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins<
def SSE_ALU_F64S : OpndItins<
IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
>;
+}
def SSE_ALU_ITINS_S : SizeItins<
SSE_ALU_F32S, SSE_ALU_F64S
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32S : OpndItins<
IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
>;
@@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins<
def SSE_MUL_F64S : OpndItins<
IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
>;
+}
def SSE_MUL_ITINS_S : SizeItins<
SSE_MUL_F32S, SSE_MUL_F64S
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32S : OpndItins<
IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
>;
@@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins<
def SSE_DIV_F64S : OpndItins<
IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
>;
+}
def SSE_DIV_ITINS_S : SizeItins<
SSE_DIV_F32S, SSE_DIV_F64S
>;
// parallel
+let Sched = WriteFAdd in {
def SSE_ALU_F32P : OpndItins<
IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
>;
@@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins<
def SSE_ALU_F64P : OpndItins<
IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
>;
+}
def SSE_ALU_ITINS_P : SizeItins<
SSE_ALU_F32P, SSE_ALU_F64P
>;
+let Sched = WriteFMul in {
def SSE_MUL_F32P : OpndItins<
IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
>;
@@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins<
def SSE_MUL_F64P : OpndItins<
IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
>;
+}
def SSE_MUL_ITINS_P : SizeItins<
SSE_MUL_F32P, SSE_MUL_F64P
>;
+let Sched = WriteFDiv in {
def SSE_DIV_F32P : OpndItins<
IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
>;
@@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins<
def SSE_DIV_F64P : OpndItins<
IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
>;
+}
def SSE_DIV_ITINS_P : SizeItins<
SSE_DIV_F32P, SSE_DIV_F64P
@@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins<
IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
>;
+let Sched = WriteVecALU in {
def SSE_INTALU_ITINS_P : OpndItins<
IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
>;
@@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins<
def SSE_INTALUQ_ITINS_P : OpndItins<
IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
>;
+}
+let Sched = WriteVecIMul in
def SSE_INTMUL_ITINS_P : OpndItins<
IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
>;
@@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
@@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, RC:$src2))], itins.rr>;
+ RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
SSEVer, "_", OpcodeStr, FPSizeStr))
- RC:$src1, mem_cpat:$src2))], itins.rm>;
+ RC:$src1, mem_cpat:$src2))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed - SSE 1 & 2 packed instructions class
@@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
- itins.rm, d>;
+ itins.rm, d>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
@@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rr, IIC_DEFAULT, d>;
+ pat_rr, IIC_DEFAULT, d>,
+ Sched<[WriteVecLogic]>;
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- pat_rm, IIC_DEFAULT, d>;
+ pat_rm, IIC_DEFAULT, d>,
+ Sched<[WriteVecLogicLd, ReadAfterLd]>;
}
//===----------------------------------------------------------------------===//
@@ -345,7 +370,7 @@ let Predicates = [HasAVX] in {
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
@@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
@@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX] in {
+ isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8f32 immAllZerosV))]>;
}
@@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1 in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
let Predicates = [HasAVX2] in
@@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
!strconcat(base_opc, asm_opr),
[(set VR128:$dst, (vt (OpNode VR128:$src1,
(scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>;
+ IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
// For the disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, RC:$src2),
!strconcat(base_opc, asm_opr),
- [], IIC_SSE_MOV_S_RR>;
+ [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>;
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
@@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- VEX, VEX_LIG;
+ VEX, VEX_LIG, Sched<[WriteStore]>;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ Sched<[WriteStore]>;
}
// Loading from memory automatically zeroing upper bits.
@@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>, VEX, VEX_LIG;
+ IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM>;
+ IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>;
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS;
@@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
+ Sched<[WriteMove]>;
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
+ [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
+ Sched<[WriteLoad]>;
}
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
@@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
TB, OpSize;
+let SchedRW = [WriteStore] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
@@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
(VMOVUPDYmr addr:$dst, VR256:$src)>;
+let SchedRW = [WriteStore] in {
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
@@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
IIC_SSE_MOVU_P_MR>;
+} // SchedRW
// For disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>;
@@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in {
(VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v8i16 (extract_subvector
(v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v16i8 (extract_subvector
(v32i8 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
@@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in {
// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>, VEX;
@@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
let isCodeGenOnly = 1 in {
def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
@@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
[(set VR128:$dst,
(psnode VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
- itin, SSEPackedSingle>, TB;
+ itin, SSEPackedSingle>, TB,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
def PDrm : PI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
!strconcat(base_opc, "d", asm_opr),
[(set VR128:$dst, (v2f64 (pdnode VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))],
- itin, SSEPackedDouble>, TB, OpSize;
+ itin, SSEPackedDouble>, TB, OpSize,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
@@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// Shuffle with VMOVLPS
@@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in {
IIC_SSE_MOV_LH>;
}
+let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (vector_extract
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
+} // SchedRW
let Predicates = [HasAVX] in {
// VMOVHPS patterns
@@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V;
+ VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
let Predicates = [HasAVX] in {
@@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins<
IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_PS : OpndItins<
IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
>;
+let Sched = WriteCvtI2F in
def SSE_CVT_Scalar : OpndItins<
IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_32 : OpndItins<
IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SS2SI_64 : OpndItins<
IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
>;
+let Sched = WriteCvtF2I in
def SSE_CVT_SD2SI : OpndItins<
IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
>;
@@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (OpNode SrcRC:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
OpndItins itins> {
let neverHasSideEffects = 1 in {
def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [], itins.rr, d>;
+ [], itins.rr, d>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [], itins.rm, d>;
+ [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
}
}
@@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
let neverHasSideEffects = 1 in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2F]>;
let mayLoad = 1 in
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src),
- !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ Sched<[WriteCvtI2FLd, ReadAfterLd]>;
} // neverHasSideEffects = 1
}
@@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
string asm, OpndItins itins> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
+ [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
+ [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
@@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
+ IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+ XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
@@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))],
- IIC_SSE_CVT_Scalar_RR>;
+ IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))],
IIC_SSE_CVT_Scalar_RM>,
XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert scalar single to scalar double
@@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
- XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+ XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
+ Sched<[WriteCvtF2F]>;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
- XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+ XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
def : Pat<(f64 (fextend FR32:$src)),
@@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))],
IIC_SSE_CVT_Scalar_RR>, XS,
- Requires<[UseSSE2]>;
+ Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))],
IIC_SSE_CVT_Scalar_RM>, XS,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
// extload f32 -> f64. This matches load+fextend because we have a hack in
// the isel (PreprocessForFPConvert) that can introduce loads after dag
@@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2F]>;
def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"cvtss2sd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>;
}
// Convert packed single/double fp to doubleword
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
// Convert Packed Double FP to Packed DW Integers
@@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- VEX;
+ VEX, Sched<[WriteCvtF2I]>;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
+ (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtF2I]>;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
- VEX, VEX_L;
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
}
@@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
// Convert with truncation packed single/double fp to doubleword
// SSE2 packed instructions with XS prefix
@@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(memopv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
+ Sched<[WriteCvtF2ILd]>;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>;
+ IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>;
+ IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in {
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>,
+ Sched<[WriteCvtF2ILd]>;
// Convert packed single to packed double
let Predicates = [HasAVX] in {
@@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
let Predicates = [UseSSE2] in {
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
- IIC_SSE_CVT_PD_RR>, TB;
+ IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, TB;
+ IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>;
}
// Convert Packed DW Integers to Packed Double FP
@@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in {
let neverHasSideEffects = 1, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
- []>, VEX;
+ []>, VEX, Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+ (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX,
+ Sched<[WriteCvtI2F]>;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L;
+ (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L;
+ (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L,
+ Sched<[WriteCvtI2F]>;
}
let neverHasSideEffects = 1, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
// AVX 256-bit register conversion intrinsics
let Predicates = [HasAVX] in {
@@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in {
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>, VEX;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
// YMM only
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
- IIC_SSE_CVT_PD_RR>;
+ IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>;
+ IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
// AVX 256-bit register conversion intrinsics
@@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RR>;
+ IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
- IIC_SSE_ALU_F32S_RM>;
+ IIC_SSE_ALU_F32S_RM>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
(ins VR128:$src1, VR128:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))],
- itins.rr>;
+ itins.rr>,
+ Sched<[itins.Sched]>;
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
(load addr:$src), imm:$cc))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Aliases to match intrinsics which expect XMM operand(s).
@@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
- IIC_SSE_COMIS_RR, d>;
+ IIC_SSE_COMIS_RR, d>,
+ Sched<[WriteFAdd]>;
def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
(ld_frag addr:$src2)))],
- IIC_SSE_COMIS_RM, d>;
+ IIC_SSE_COMIS_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Defs = [EFLAGS] in {
@@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>;
+ IIC_SSE_CMPP_RR, d>,
+ Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>;
+ IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>;
+ asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>;
+ asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
@@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, i8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
+ (i8 imm:$src3))))], IIC_SSE_SHUFP, d>,
+ Sched<[WriteShuffle]>;
}
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
(outs RC:$dst), (ins RC:$src1, RC:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1, RC:$src2)))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>;
def rm : PI<opc, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
asm, [(set RC:$dst,
(vt (OpNode RC:$src1,
(mem_frag addr:$src2))))],
- IIC_SSE_UNPCK, d>;
+ IIC_SSE_UNPCK, d>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
+ [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W;
+ IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in {
// Assembler Only
def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX;
+ SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX;
+ OpSize, VEX, Sched<[WriteVecLogic]>;
def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, VEX_L;
+ SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
SSEPackedDouble>, TB,
- OpSize, VEX, VEX_L;
+ OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(bitconvert (memop_frag addr:$src2)))))],
- itins.rm>;
+ itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in {
///
/// And, we have a special variant form for a full-vector intrinsic form.
+let Sched = WriteFSqrt in {
def SSE_SQRTP : OpndItins<
IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
>;
@@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins<
def SSE_SQRTS : OpndItins<
IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
>;
+}
+let Sched = WriteFRcp in {
def SSE_RCPP : OpndItins<
IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
>;
@@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins<
def SSE_RCPS : OpndItins<
IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
>;
+}
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand.
@@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR32:$src1, FR32:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1,f32mem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat("v", OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]>;
+ [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>;
// For scalar unary operations, fold a load into the operation
// only in OptForSize mode. It eliminates an instruction, but it also
// eliminates a whole-register clobber (the load), so it introduces a
@@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
- Requires<[UseSSE1, OptForSize]>;
+ Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>;
let Constraints = "$src1 = $dst" in {
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rr>;
+ [], itins.rr>, Sched<[itins.Sched]>;
let mayLoad = 1, hasSideEffects = 0 in
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [], itins.rm>;
+ [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
@@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int VR256:$src))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst),
(ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int VR128:$src))],
- itins.rr>;
+ itins.rr>, Sched<[itins.Sched]>;
def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
@@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in {
(ins FR64:$src1, FR64:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>;
let mayLoad = 1 in {
def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1,f64mem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat("v", OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, VEX_4V, VEX_LIG;
+ []>, VEX_4V, VEX_LIG,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
+ [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
// See the comments in sse1_fp_unop_s for why this is OptForSize.
def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
[(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
- Requires<[UseSSE2, OptForSize]>;
+ Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>;
def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
+ [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
- itins.rr>, VEX;
+ itins.rr>, VEX, Sched<[itins.Sched]>;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
- itins.rm>, VEX;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
- itins.rm>, VEX, VEX_L;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>,
+ Sched<[itins.Sched]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
+ [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>,
+ Sched<[itins.Sched.Folded]>;
}
// Square root.
@@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in {
//===----------------------------------------------------------------------===//
let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
-
- def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
-}
+let SchedRW = [WriteStore] in {
+def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2i64 VR128:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX;
+
+def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
+let ExeDomain = SSEPackedInt in
+def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4i64 VR256:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>, VEX, VEX_L;
-let AddedComplexity = 400 in { // Prefer non-temporal versions
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
@@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
IIC_SSE_MOVNT>;
-def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
- (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
-
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
[(nontemporalstore (i64 GR64:$src), addr:$dst)],
IIC_SSE_MOVNT>,
TB, Requires<[HasSSE2]>;
-}
+} // SchedRW = [WriteStore]
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>;
+} // AddedComplexity
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Prefetch and memory fence
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-let Predicates = [HasSSE1] in {
+let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
IIC_SSE_PREFETCH>, TB;
@@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src),
IIC_SSE_PREFETCH>, TB;
}
+// FIXME: How should these memory instructions be modeled?
+let SchedRW = [WriteLoad] in {
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
@@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
TB, Requires<[HasSSE2]>;
+} // SchedRW
def : Pat<(X86SFence), (SFENCE)>;
def : Pat<(X86LFence), (LFENCE)>;
@@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
VEX;
@@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
}
// For Disassembler
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
@@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
VEX;
@@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in {
}
}
-let mayStore = 1, neverHasSideEffects = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
}
}
+let SchedRW = [WriteMove] in {
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
@@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- neverHasSideEffects = 1 in {
+ neverHasSideEffects = 1, SchedRW = [WriteLoad] in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1 in {
+let mayStore = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecIMul in
def SSE_PMADD : OpndItins<
IIC_SSE_PMADD, IIC_SSE_PMADD
>;
@@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
- itins.rm>;
+ itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
@@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
- itins.rr>;
+ itins.rr>, Sched<[WriteVecShift]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, i128mem:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode RC:$src1,
- (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+ (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
+ Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
(ins RC:$src1, i32i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ Sched<[WriteVecShift]>;
}
/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
@@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))]>;
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
} // ExeDomain = SSEPackedInt
@@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
@@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32,
SSE_INTSHIFT_ITINS_P>;
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
// 128-bit logical shifts.
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
@@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX;
+ IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX2] in {
@@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L;
+ IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [UseSSE2] in {
@@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>;
+ IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>;
+ (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ Sched<[WriteShuffleLd]>;
}
}
} // ExeDomain = SSEPackedInt
@@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
!strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>, Sched<[WriteShuffle]>;
def rm : PDI<opc, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_frag (memopv2i64
addr:$src2))))],
- IIC_SSE_UNPCK>;
+ IIC_SSE_UNPCK>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
@@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
def Yrr : PDI<opc, MRMSrcReg,
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+ [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
def Yrm : PDI<opc, MRMSrcMem,
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (OpNode VR256:$src1,
- (bc_frag (memopv4i64 addr:$src2))))]>;
+ (bc_frag (memopv4i64 addr:$src2))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))], IIC_SSE_PINSRW>;
+ imm:$src3))], IIC_SSE_PINSRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
}
// Extract
@@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX;
+ imm:$src2))]>, TB, OpSize, VEX,
+ Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>;
+ imm:$src2))], IIC_SSE_PEXTRW>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
let Predicates = [HasAVX] in {
@@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in {
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V;
+ []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
}
let Constraints = "$src1 = $dst" in
@@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
// SSE2 - Conditional Store
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
@@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteMove]>;
def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ Sched<[WriteMove]>;
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
@@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
+ Sched<[WriteMove]>;
def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
- (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ Sched<[WriteMove]>;
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
+let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"vmov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
@@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
+} //SchedRW
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
@@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in
def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX;
+ VEX, Sched<[WriteLoad]>;
def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
@@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, VEX;
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>;
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
//
+let SchedRW = [WriteMove] in {
let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>;
}
+} // SchedRW
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, SchedRW = [WriteLoad] in {
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
(v4i32 (X86vzmovl (v4i32 (scalar_to_vector
(loadi32 addr:$src))))))],
IIC_SSE_MOVDQ>;
-}
+} // AddedComplexity, SchedRW
let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
@@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
//
+
+let SchedRW = [WriteLoad] in {
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (scalar_to_vector (loadi64 addr:$src))))],
IIC_SSE_MOVDQ>, XS,
Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix
+} // SchedRW
//===---------------------------------------------------------------------===//
// Move Packed Quadword Int to Quadword Int
//
+let SchedRW = [WriteStore] in {
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>;
+} // SchedRW
//===---------------------------------------------------------------------===//
// Store / copy lower 64-bits of a XMM register.
//
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX,
+ Sched<[WriteStore]>;
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
- IIC_SSE_MOVDQ>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
@@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, Requires<[UseSSE2]>;
+ XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
let Predicates = [HasAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
@@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)),
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
//
+let SchedRW = [WriteVecLogic] in {
let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
XS, Requires<[UseSSE2]>;
+} // SchedRW
+let SchedRW = [WriteVecLogicLd] in {
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>;
}
+} // SchedRW
let AddedComplexity = 20 in {
let Predicates = [HasAVX] in {
@@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in {
}
// Instructions to match in the assembler
+let SchedRW = [WriteMove] in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
@@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVDQ>, VEX, VEX_W;
+} // SchedRW
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let SchedRW = [WriteMove] in {
let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
+} // SchedRW
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
@@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (vt (OpNode RC:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (OpNode (mem_frag addr:$src)))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> {
let neverHasSideEffects = 1 in
def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [], IIC_SSE_MOV_LH>;
+ [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (X86Movddup
(scalar_to_vector (loadf64 addr:$src)))))],
- IIC_SSE_MOV_LH>;
+ IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>;
}
// FIXME: Merge with above classe when there're patterns for the ymm version
multiclass sse3_replicate_dfp_y<string OpcodeStr> {
def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+ [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>,
+ Sched<[WriteShuffle]>;
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(v4f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>;
+ (scalar_to_vector (loadf64 addr:$src)))))]>,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX] in {
@@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in {
// SSE3 - Move Unaligned Integer
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
@@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
IIC_SSE_LDDQU>;
+}
//===---------------------------------------------------------------------===//
// SSE3 - Arithmetic
@@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
def rm : I<0xD0, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>,
+ Sched<[WriteFAdd]>;
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
- IIC_SSE_HADDSUB_RM>;
+ IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>;
}
let Predicates = [HasAVX] in {
@@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src),
@@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst,
(IntId128
(bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
- OpSize;
+ OpSize, Sched<[WriteVecALULd]>;
}
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
@@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
(ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (IntId256 VR256:$src))]>,
- OpSize;
+ OpSize, Sched<[WriteVecALU]>;
def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(IntId256
- (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
+ (bitconvert (memopv4i64 addr:$src))))]>, OpSize,
+ Sched<[WriteVecALULd]>;
}
let Predicates = [HasAVX] in {
@@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
+let Sched = WriteVecALU in {
def SSE_PHADDSUBD : OpndItins<
IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
>;
@@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins<
def SSE_PHADDSUBW : OpndItins<
IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
>;
+}
+let Sched = WriteShuffle in
def SSE_PSHUFB : OpndItins<
IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
>;
+let Sched = WriteVecALU in
def SSE_PSIGN : OpndItins<
IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
>;
+let Sched = WriteVecIMul in
def SSE_PMULHRSW : OpndItins<
IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
>;
@@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
@@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1,
- (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+ (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
@@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize;
+ OpSize, Sched<[itins.Sched]>;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))]>, OpSize,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
@@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize;
+ [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+ []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
// SSSE3 - Thread synchronization
//===---------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let usesCustomInserter = 1 in {
def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
[(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
@@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in
def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait",
[(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
TB, Requires<[HasSSE3]>;
+} // SchedRW
def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 3caa1b538c..053417ccde 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>,
TB;
@@ -35,6 +36,7 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))], IIC_INT3>;
+} // SchedRW
def : Pat<(debugtrap),
(INT3)>;
@@ -43,6 +45,7 @@ def : Pat<(debugtrap),
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//def : InstAlias<"int\t$3", (INT3)>;
+let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)], IIC_INT>;
@@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>;
def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
Requires<[In64BitMode]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
//
+let SchedRW = [WriteSystem] in {
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins),
"in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>;
@@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>;
def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize;
def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
+let SchedRW = [WriteSystem] in {
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
@@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from control registers
+let SchedRW = [WriteSystem] in {
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
@@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
@@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
// Moves to and from segment registers.
//
+let SchedRW = [WriteMove] in {
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
@@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
+let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
@@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
"verw\t$seg", [], IIC_VERW_MEM>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
"verw\t$seg", [], IIC_VERW_REG>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
+let SchedRW = [WriteSystem] in {
def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
"sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>;
def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
@@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
-
+} // SchedRW
+
//===----------------------------------------------------------------------===//
// Specialized register support
+let SchedRW = [WriteSystem] in {
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
@@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// Cache instructions
+let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
+} // SchedRW
//===----------------------------------------------------------------------===//
// XSAVE instructions
+let SchedRW = [WriteSystem] in {
let Defs = [RDX, RAX], Uses = [RCX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
@@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in {
def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
"xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
}
+} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 3af1b3e06b..a8a9fd8acc 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -407,6 +407,57 @@ ReSimplify:
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
+ // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
+ // if one of the registers is extended, but other isn't.
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
+ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
+ unsigned NewOpc;
+ switch (OutMI.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ OutMI.setOpcode(NewOpc);
+ }
+ break;
+ }
+
// TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
// inputs modeled as normal uses instead of implicit uses. As such, truncate
// off all but the first operand (the callee). FIXME: Change isel.
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d99d085298..bcdd0eb56d 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -7,6 +7,88 @@
//
//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+//
+// These annotations are independent of the itinerary classes defined below.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+ // The SchedWrite to use when a load is folded into the instruction.
+ SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+ // Register-Memory operation.
+ def Ld : SchedWrite;
+ // Register-Register operation.
+ def NAME : X86FoldableSchedWrite {
+ let Folded = !cast<SchedWrite>(NAME#"Ld");
+ }
+}
+
+// Arithmetic.
+defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
+defm WriteIMul : X86SchedWritePair; // Integer multiplication.
+defm WriteIDiv : X86SchedWritePair; // Integer division.
+def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteMove : SchedWrite;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare.
+defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
+defm WriteFDiv : X86SchedWritePair; // Floating point division.
+defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
+defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal.
+
+// Vector integer operations.
+defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
+defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply.
+
+// Vector bitwise operations.
+// These are often used on both floating point and integer vectors.
+defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends.
+
+// Conversion between integer and float.
+defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for X86
def IIC_DEFAULT : InstrItinClass;
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index be2a997b8e..3e3b86edbb 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ static const CostTblEntry<MVT> AVX2CostTable[] = {
+ // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
+ // customize them to detect the cases where shift amount is a scalar one.
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 1 },
+ { ISD::SRA, MVT::v4i32, 1 },
+ { ISD::SHL, MVT::v8i32, 1 },
+ { ISD::SRL, MVT::v8i32, 1 },
+ { ISD::SRA, MVT::v8i32, 1 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 1 },
+ { ISD::SHL, MVT::v4i64, 1 },
+ { ISD::SRL, MVT::v4i64, 1 },
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
+ ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX2CostTable[Idx].Cost;
+ }
+
static const CostTblEntry<MVT> AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
@@ -257,8 +280,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
};