diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-03-26 02:28:59 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-03-26 02:28:59 +0000 |
commit | d934545ae6a00aa8a8179a93d11cbd93a5240849 (patch) | |
tree | ab44db08aa63a8f94a3e09d6491c4156c624af96 /lib/Target/PowerPC | |
parent | 868d4470cdfa9472353ea2a49a6c456ddae9c95b (diff) | |
parent | c204410d6bc435e7cb8ea768759a54135e8e92b5 (diff) |
Updating branches/google/testing to r177703testing
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@177985 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/PowerPC')
20 files changed, 1079 insertions, 509 deletions
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 7917f7736e..709daa4e4b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 4a420929d0..38a7420d97 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 972e13852e..b0680fbb8c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -17,6 +17,10 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc489f6..81a54d7015 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + int64_t start = DefInstr->getOperand(2).getImm(); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, @@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index caeb1796f7..2a680661d3 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -136,3 +136,8 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV F27, F28, F29, F30, F31, CR2, CR3, CR4, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; + +def CSR_NoRegs : CalleeSavedRegs<(add)>; + +def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; + diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a396e6693..0108f7256e 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -281,6 +299,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } +void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { + bool is31 = needsFP(MF); + unsigned FPReg = is31 ? PPC::R31 : PPC::R1; + unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) + for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + --MBBI; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg()) + continue; + + switch (MO.getReg()) { + case PPC::FP: + MO.setReg(FPReg); + break; + case PPC::FP8: + MO.setReg(FP8Reg); + break; + } + } + } +} void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -311,13 +354,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (MFI->isFrameAddressTaken()) + replaceFPWithRealFP(MF); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get operating system @@ -780,7 +822,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +864,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +880,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -1031,6 +1059,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } } bool diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d09e47fafd..6f5f9368c6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -41,10 +43,13 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; + void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -144,6 +149,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 17bea8a6a6..90a3a5c6a4 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -164,6 +164,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13cb358fc0..c7d454692e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); + PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -208,6 +212,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. @@ -564,6 +576,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; + case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; @@ -1028,7 +1042,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1077,7 +1091,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1140,7 +1154,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1180,13 +1194,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); } else return false; @@ -1205,6 +1221,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -4555,6 +4575,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } +SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -5551,6 +5586,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, @@ -5744,7 +5782,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -5863,9 +5901,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } +llvm::MachineBasicBlock* +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + // For v = setjmp(buf), we generate + // + // thisMBB: + // SjLjSetup mainMBB + // bl mainMBB + // v_restore = 1 + // b sinkMBB + // + // mainMBB: + // buf[LabelOffset] = LR + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Note that the structure of the jmp_buf used here is not compatible + // with that used by libc, and is not designed to be. Specifically, it + // stores only those 'reserved' registers that LLVM does not otherwise + // understand how to spill. Also, by convention, by the time this + // intrinsic is called, Clang has already stored the frame address in the + // first slot of the buffer and stack address in the third. Following the + // X86 target code, we'll store the jump address in the second slot. We also + // need to save the TOC pointer (R2) to handle jumps between shared + // libraries, and that will be stored in the fourth slot. The thread + // identifier (R13) is not affected. + + // thisMBB: + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + // Prepare IP either in reg. + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned LabelReg = MRI.createVirtualRegister(PtrRC); + unsigned BufReg = MI->getOperand(1).getReg(); + + if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) + .addReg(PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB); + MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + + BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) + .addMBB(mainMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); + + thisMBB->addSuccessor(mainMBB, /* weight */ 0); + thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + + // mainMBB: + // mainDstReg = 0 + MIB = BuildMI(mainMBB, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + + // Store IP + if (PPCSubTarget.isPPC64()) { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) + .addReg(LabelReg) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) + .addReg(LabelReg) + .addImm(LabelOffset) + .addReg(BufReg); + } + + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(PPC::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(thisMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; + unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + unsigned BufReg = MI->getOperand(0).getReg(); + + // Reload FP (the jumped-to function may not have had a + // frame pointer, and if so, then its r31 will be restored + // as necessary). + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) + .addImm(0) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) + .addImm(0) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload IP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) + .addImm(LabelOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload SP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) + .addImm(SPOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) + .addImm(SPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // FIXME: When we also support base pointers, that register must also be + // restored here. + + // Reload TOC + if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Jump + BuildMI(*MBB, MI, DL, + TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); + BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + return emitEHSjLjSetJmp(MI, BB); + } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + return emitEHSjLjLongJmp(MI, BB); + } + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -6133,7 +6400,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -6622,6 +6889,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); + return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); @@ -6806,13 +7076,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()) && - MFI->getStackSize() && - !MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); - unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : - (is31 ? PPC::R31 : PPC::R1); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned FrameReg; + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + FrameReg = isPPC64 ? PPC::X1 : PPC::R1; + else + FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) @@ -6851,6 +7124,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3931384d89..1b0427e021 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -119,6 +120,12 @@ namespace llvm { /// are undefined. MFCR, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 @@ -321,6 +328,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; + const PPCRegisterInfo *PPCRegInfo; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -395,6 +403,12 @@ namespace llvm { MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -449,6 +463,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd @@ -604,6 +622,9 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 01201304f7..d2b1d6d392 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -30,12 +30,12 @@ def symbolLo64 : Operand<i64> { let EncoderMethod = "getLO16Encoding"; } def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); + let MIOperandInfo = (ops i64imm:$imm); } def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 let PrintMethod = "printMemRegImm"; let EncoderMethod = "getMemRIXEncoding"; - let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo64:$off, ptr_rc_nor0:$reg); } def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; @@ -273,6 +273,19 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -393,15 +406,16 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; } -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, s16imm64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), + [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>; +def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + [(set G8RC:$rD, (add G8RC_NOX0:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + [(set G8RC:$rD, (add G8RC_NOX0:$rA, + imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), @@ -536,7 +550,7 @@ def RLWINM8 : MForm_2<21, []>; def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } // End FXU Operations. @@ -555,7 +569,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set G8RC:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, @@ -567,25 +582,26 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in -def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, - ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLHAU, - []>, RegConstraint<"$rA = $ea_result">, +let mayLoad = 1 in { +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { @@ -612,30 +628,30 @@ def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -648,7 +664,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; + [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64; def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), "ld $rD, $src", LdStLD, []>, isPPC64; @@ -684,12 +700,12 @@ def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -798,82 +814,11 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, PPC970_DGroup_Cracked; -} - -let PPC970_Unit = 2 in { - -def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; - - -def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStSTDU, - [(set ptr_rc:$ea_res, - (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked, isPPC64; - // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), "std $rT, $dst", LdStSTD, @@ -884,6 +829,60 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), PPC970_DGroup_Cracked; } +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), + "stdu $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STBU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STHU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STWU8 G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(aligned4pre_store G8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STDU G8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; + +def : Pat<(pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STBUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STHUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STWUX8 G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STDUX G8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; //===----------------------------------------------------------------------===// @@ -979,3 +978,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), (ADDIS8 G8RC:$in, tjumptable:$g)>; def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS8 G8RC:$in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst), + (STDX G8RC:$rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0cf28ae4b5..0ed7ff2cc4 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), (VRFIZ VRRC:$vA)>; def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), (VRFIN VRRC:$vA)>; + +} // end HasAltivec + diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index c3c171cd21..16dbee5d29 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, let CR = 0; } +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a0517a80a9..cf39386c67 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,11 +33,6 @@ #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { @@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +522,20 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +552,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,7 +578,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), @@ -642,37 +605,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +638,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_VRSAVE), + DestReg), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +667,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 374213ea43..5d4ae915b4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e94342d..84cdb1f39d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -158,6 +158,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -278,6 +286,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -314,9 +354,6 @@ def s16imm : Operand<i32> { def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; } -def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. - let PrintMethod = "printS16X4ImmOperand"; -} def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; @@ -344,21 +381,30 @@ def crbitm: Operand<i8> { let EncoderMethod = "get_crbitm_encoding"; } // Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def ptr_rc_nor0 : PointerLikeRegClass<1>; + def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$offreg, ptr_rc:$ptrreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } +// A single-register address. This is used with the SjLj +// pseudo-instructions. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc:$ptrreg); +} + // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. def pred : PredicateOperand<OtherVT, (ops imm, CRRC), @@ -372,6 +418,10 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; @@ -473,6 +523,14 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } } +// The direct BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; + } +} + // Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions @@ -551,6 +609,23 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set GPRC:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), @@ -708,69 +783,69 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.offreg = $ea_result">, @@ -835,44 +910,39 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2 in { -def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +let PPC970_Unit = 2, mayStore = 1 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), + "stfsu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), + "stfdu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STBU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STHU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store GPRC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store F4RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STFSU F4RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; +def : Pat<(pre_store F8RC:$rS, ptr_rc_nor0:$ptrreg, iaddroff:$ptroff), + (STFDU F8RC:$rS, iaddroff:$ptroff, ptr_rc_nor0:$ptrreg)>; // Indexed (r+r) Stores. -// let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, @@ -887,48 +957,6 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), - (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), - (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, @@ -950,6 +978,44 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), [(store F8RC:$frS, xaddr:$dst)]>; } +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), + "stfsux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), + "stfdux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.offreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STBUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STHUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STWUX GPRC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STFSUX F4RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; +def : Pat<(pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff), + (STFDUX F8RC:$rS, xaddroff:$ptroff, ptr_rc:$ptrreg)>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -959,12 +1025,12 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; -def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), + [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>; +def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; + [(set GPRC:$rD, (add GPRC_NOR0:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, @@ -974,12 +1040,13 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + [(set GPRC:$rD, (add GPRC_NOR0:$rA, + imm16ShiftedSExt:$imm))]>; +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, - [(set GPRC:$rD, (add GPRC:$rA, + [(set GPRC:$rD, (add GPRC_NOR0:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, @@ -1213,6 +1280,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; +let isCodeGenOnly = 1 in { + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1435,7 +1525,7 @@ let Uses = [RM] in { let PPC970_Unit = 1 in { // FXU Operations. def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, pred:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375dd8..b1636a20b8 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; @@ -78,6 +85,8 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), LRStoreRequired(false), MinReservedArea(0), @@ -109,6 +118,12 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index df245cc655..75e20e4a26 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,26 +46,8 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, @@ -89,17 +71,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; } -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); -} - - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + if (Kind == 1) { + if (Subtarget.isPPC64()) + return &PPC::G8RC_NOX0RegClass; + return &PPC::GPRC_NOR0RegClass; + } + if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -123,11 +105,33 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } +const uint32_t* +PPCRegisterInfo::getNoPreservedMask() const { + // The naming here is inverted: The CSR_NoRegs_Altivec has the + // Altivec registers masked so that they're not saved and restored around + // instructions with this preserved mask. + + if (!Subtarget.hasAltivec()) + return CSR_NoRegs_Altivec_RegMask; + + return CSR_NoRegs_RegMask; +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + // The ZERO register is not really a register, but the representation of r0 + // when used in instructions that treat r0 as the constant 0. + Reserved.set(PPC::ZERO); + Reserved.set(PPC::ZERO8); + + // The FP register is also not really a register, but is the representation + // of the frame pointer register used by ISD::FRAMEADDR. + Reserved.set(PPC::FP); + Reserved.set(PPC::FP8); + Reserved.set(PPC::R0); Reserved.set(PPC::R1); Reserved.set(PPC::LR); @@ -139,35 +143,22 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. - // Note that this is over conservative, as it also prevents allocation of R31 - // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); - Reserved.set(PPC::R31); Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); - Reserved.set(PPC::X31); + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::X31); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; + case PPC::G8RC_NOX0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -271,28 +236,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -302,17 +255,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -369,8 +315,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -412,8 +357,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -438,6 +382,65 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, MBB.erase(II); } +void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + unsigned Reg = PPC::R0; + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + unsigned Reg = PPC::R0; + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_VRSAVE does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) + .addReg(Reg); + + // Discard the pseudo instruction. + MBB.erase(II); +} + bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { @@ -498,15 +501,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::SPILL_VRSAVE) { + lowerVRSAVESpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_VRSAVE) { + lowerVRSAVERestore(II, FrameIndex, SPAdj, RS); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -529,7 +536,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, isIXAddr = true; break; } - + + bool noImmForm = false; + switch (OpC) { + case PPC::LVEBX: + case PPC::LVEHX: + case PPC::LVEWX: + case PPC::LVX: + case PPC::LVXL: + case PPC::LVSL: + case PPC::LVSR: + case PPC::STVEBX: + case PPC::STVEHX: + case PPC::STVEWX: + case PPC::STVX: + case PPC::STVXL: + noImmForm = true; + break; + } + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -553,7 +578,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -563,13 +589,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC); // Insert a set of rA with the full offset value before the ld, st, or add BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) @@ -584,7 +606,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9840666242..ba0fb48b85 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -44,16 +44,22 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; @@ -61,6 +67,11 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + void lowerVRSAVESpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + void lowerVRSAVERestore(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; + bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 8ee9b1ec9f..55d36bbfc9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -86,6 +86,14 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "r0">; +def ZERO8 : GP8<ZERO, "r0">; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; + // Condition register bits def CR0LT : CRBIT< 0, "0">; def CR0GT : CRBIT< 1, "1">; @@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, LR)>; + R31, R0, R1, LR, FP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, LR8)>; + X31, X13, X0, X1, LR8, FP8)>; + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>; +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the |