diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-09-25 17:30:25 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-09-25 18:01:23 -0700 |
commit | a27c28b1427dc2082ab2b31efdbb25f9fde31b61 (patch) | |
tree | 6f3ff025f542ca3f66a1a01cbf239aeef7784511 /lib/Target/PowerPC | |
parent | 0e15ffd8cb1ec642eddb96380660914ff2b007e1 (diff) | |
parent | bc4021f31eaa97ee52655828da3e3de14a39e4a6 (diff) |
Merge commit 'bc4021f31eaa97ee52655828da3e3de14a39e4a6'
Conflicts:
lib/MC/MCAssembler.cpp
lib/Target/ARM/ARMISelDAGToDAG.cpp
lib/Target/Mips/MipsInstrFPU.td
lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
lib/Target/X86/X86ISelLowering.h
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 6 | ||||
-rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCAsmPrinter.cpp | 13 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCFrameLowering.cpp | 245 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCFrameLowering.h | 71 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 192 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 29 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.cpp | 34 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.h | 5 |
11 files changed, 451 insertions, 151 deletions
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 48de583afd..1744738622 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -181,7 +181,7 @@ namespace { -MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index b9ea8b5562..215aa40c4a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -59,8 +59,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { HasLEB128 = true; // Target asm supports leb128 directives (little-endian) // Exceptions handling - if (!is64Bit) - ExceptionsType = ExceptionHandling::DwarfCFI; + ExceptionsType = ExceptionHandling::DwarfCFI; ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index f6524222fd..c0248a6045 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -25,9 +25,9 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { class PPCMCCodeEmitter : public MCCodeEmitter { - PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT - + PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; + public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 7162e158f0..a0e4cf3005 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -36,7 +36,7 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); +MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU); /// createPPCELFObjectWriter - Construct an PPC ELF object writer. MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 6e0e8bb8bc..a66677fa0f 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -368,9 +368,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); MCSymbol *&TOCEntry = TOC[MOSymbol]; - if (TOCEntry == 0) - TOCEntry = GetTempSymbol("C", TOCLabelID++); - + // To avoid name clash check if the name already exists. + while (TOCEntry == 0) { + if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + "C" + Twine(TOCLabelID++)) == 0) { + TOCEntry = GetTempSymbol("C", TOCLabelID); + } + } + const MCExpr *Exp = MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, OutContext); @@ -420,6 +425,8 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { 8/*size*/, 0/*addrspace*/); OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), 8/*size*/, 0/*addrspace*/); + // Emit a null environment pointer. + OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index c24afa908d..97d3600b1b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -13,6 +13,7 @@ #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" +#include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -168,6 +169,11 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MI->eraseFromParent(); } +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isCRSpilled(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { @@ -184,13 +190,21 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). + // to adjust the stack pointer (we fit in the Red Zone). For 64-bit + // SVR4, we also require a stack frame if we need to spill the CR, + // since this spill area is addressed relative to the stack pointer. bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can + // still generate stackless code if all local vars are reg-allocated. + // Try: (FrameSize <= 224 + // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. + !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave. + Subtarget.isSVR4ABI() + && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); @@ -488,7 +502,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); unsigned Reg = CSI[I].getReg(); if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; @@ -497,6 +510,25 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (PPC::CRBITRCRegClass.contains(Reg)) continue; + // For SVR4, don't emit a move for the CR spill slot if we haven't + // spilled CRs. + if (Subtarget.isSVR4ABI() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4) + && !spillsCR(MF)) + continue; + + // For 64-bit SVR4 when we have spilled CRs, the spill location + // is SP+8, not a frame-relative slot. + if (Subtarget.isSVR4ABI() + && Subtarget.isPPC64() + && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + MachineLocation CSDst(PPC::X1, 8); + MachineLocation CSSrc(PPC::CR2); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + continue; + } + + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); Moves.push_back(MachineMove(Label, CSDst, CSSrc)); @@ -714,11 +746,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } -static bool spillsCR(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - return FuncInfo->isCRSpilled(); -} - /// MustSaveLR - Return true if this function requires that we save the LR /// register onto the stack in the prolog and restore it in the epilog of the /// function. @@ -808,7 +835,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) bool HasGPSaveArea = false; bool HasG8SaveArea = false; bool HasFPSaveArea = false; - bool HasCRSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; @@ -843,10 +869,9 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinFPR) { MinFPR = Reg; } -// FIXME SVR4: Disable CR save area for now. } else if (PPC::CRBITRCRegClass.contains(Reg) || PPC::CRRCRegClass.contains(Reg)) { -// HasCRSaveArea = true; + ; // do nothing, as we already know whether CRs are spilled } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; } else if (PPC::VRRCRegClass.contains(Reg)) { @@ -926,16 +951,21 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } - // The CR save area is below the general register save area. - if (HasCRSaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the CR/CRBIT register class? + // For 32-bit only, the CR save area is below the general register + // save area. For 64-bit SVR4, the CR save area is addressed relative + // to the stack pointer and hence does not need an adjustment here. + // Only CR2 (the first nonvolatile spilled) has an associated frame + // index so that we have a single uniform save area. + if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::CRBITRCRegClass.contains(Reg) || - PPC::CRRCRegClass.contains(Reg)) { + if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) + // Leave Darwin logic as-is. + || (!Subtarget.isSVR4ABI() && + (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -973,3 +1003,184 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } } + +bool +PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + DebugLoc DL; + bool CRSpilled = false; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // CR2 through CR4 are the nonvolatile CR fields. + bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; + + if (CRSpilled && IsCRField) + continue; + + // Add the callee-saved register as live-in; it's killed at the spill. + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. + if (IsCRField) { + CRSpilled = true; + // The first time we see a CR field, store the whole CR into the + // save slot via GPR12 (available in the prolog for 32- and 64-bit). + if (Subtarget.isPPC64()) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::X12, + getKillRegState(true)) + .addImm(8) + .addReg(PPC::X1)); + } else { + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have + // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)); + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::R12, + getKillRegState(true)), + CSI[i].getFrameIdx())); + } + + // Record that we spill the CR in this function. + PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); + } + } + return true; +} + +static void +restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + DebugLoc DL; + unsigned RestoreOp, MoveReg; + + if (isPPC64) { + // 64-bit: SP+8 + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + .addImm(8) + .addReg(PPC::X1)); + RestoreOp = PPC::MTCRF8; + MoveReg = PPC::X12; + } else { + // 32-bit: FP-relative + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), + PPC::R12), + CSI[CSIIndex].getFrameIdx())); + RestoreOp = PPC::MTCRF; + MoveReg = PPC::R12; + } + + if (CR2Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) + .addReg(MoveReg)); + + if (CR3Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) + .addReg(MoveReg)); + + if (CR4Spilled) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) + .addReg(MoveReg)); +} + +bool +PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + // Currently, this function only handles SVR4 32- and 64-bit ABIs. + // Return false otherwise to maintain pre-existing behavior. + if (!Subtarget.isSVR4ABI()) + return false; + + MachineFunction *MF = MBB.getParent(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo()); + bool CR2Spilled = false; + bool CR3Spilled = false; + bool CR4Spilled = false; + unsigned CSIIndex = 0; + + // Initialize insertion-point logic; we will be restoring in reverse + // order of spill. + MachineBasicBlock::iterator I = MI, BeforeI = I; + bool AtStart = I == MBB.begin(); + + if (!AtStart) + --BeforeI; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (Reg == PPC::CR2) { + CR2Spilled = true; + // The spill slot is associated only with CR2, which is the + // first nonvolatile spilled. Save it here. + CSIIndex = i; + continue; + } else if (Reg == PPC::CR3) { + CR3Spilled = true; + continue; + } else if (Reg == PPC::CR4) { + CR4Spilled = true; + continue; + } else { + // When we first encounter a non-CR register after seeing at + // least one CR register, restore all spilled CRs together. + if ((CR2Spilled || CR3Spilled || CR4Spilled) + && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + CR2Spilled = CR3Spilled = CR4Spilled = false; + } + + // Default behavior for non-CR saves. + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), + RC, TRI); + assert(I != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + } + + // Insert in reverse order. + if (AtStart) + I = MBB.begin(); + else { + I = BeforeI; + ++I; + } + } + + // If we haven't yet spilled the CRs, do so now. + if (CR2Spilled || CR3Spilled || CR4Spilled) + restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled, + MBB, I, CSI, CSIIndex); + + return true; +} + diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d708541c66..4d957b91c7 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -45,6 +45,16 @@ public: RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). @@ -170,23 +180,11 @@ public: {PPC::R15, -68}, {PPC::R14, -72}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, + // CR save area offset. We map each of the nonvolatile CR fields + // to the slot for CR2, which is the first of the nonvolatile CR + // fields to be assigned, so that we only allocate one save slot. + // See PPCRegisterInfo::hasReservedSpillSlot() for more information. + {PPC::CR2, -4}, // VRSAVE save area offset. {PPC::VRSAVE, -4}, @@ -228,27 +226,6 @@ public: {PPC::F14, -144}, // General register save area offsets. - // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit - // mode? - {PPC::R31, -4}, - {PPC::R30, -12}, - {PPC::R29, -20}, - {PPC::R28, -28}, - {PPC::R27, -36}, - {PPC::R26, -44}, - {PPC::R25, -52}, - {PPC::R24, -60}, - {PPC::R23, -68}, - {PPC::R22, -76}, - {PPC::R21, -84}, - {PPC::R20, -92}, - {PPC::R19, -100}, - {PPC::R18, -108}, - {PPC::R17, -116}, - {PPC::R16, -124}, - {PPC::R15, -132}, - {PPC::R14, -140}, - {PPC::X31, -8}, {PPC::X30, -16}, {PPC::X29, -24}, @@ -268,24 +245,6 @@ public: {PPC::X15, -136}, {PPC::X14, -144}, - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, - // VRSAVE save area offset. {PPC::VRSAVE, -4}, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index dbb3b144a7..2e8fa1842a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1264,8 +1264,8 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, unsigned MOHiFlag, MOLoFlag; bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); - SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag); - SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag); + SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); + SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } @@ -1717,16 +1717,16 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, SmallVectorImpl<SDValue> &InVals) const { if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { - return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, - dl, DAG, InVals); - } else { - return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, + return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); + } else { + return LowerFormalArguments_Darwin_Or_64SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); } } SDValue -PPCTargetLowering::LowerFormalArguments_SVR4( +PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> @@ -1944,7 +1944,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( } SDValue -PPCTargetLowering::LowerFormalArguments_Darwin( +PPCTargetLowering::LowerFormalArguments_Darwin_Or_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> @@ -1959,6 +1959,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; + bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); @@ -2019,10 +2020,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin( default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: case MVT::f32: - VecArgOffset += isPPC64 ? 8 : 4; + VecArgOffset += 4; break; case MVT::i64: // PPC64 case MVT::f64: + // FIXME: We are guaranteed to be !isPPC64 at this point. + // Does MVT::i64 apply? VecArgOffset += 8; break; case MVT::v4f32: @@ -2076,8 +2079,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; - // Objects of size 1 and 2 are right justified, everything else is - // left justified. This means the memory address is adjusted forwards. + // FOR DARWIN: Objects of size 1 and 2 are right justified, everything + // else is left justified. This means the memory address is adjusted + // forwards. + // FOR 64-BIT SVR4: All aggregates smaller than 8 bytes must be passed + // right-justified. if (ObjSize==1 || ObjSize==2) { CurArgOffset = CurArgOffset + (4 - ObjSize); } @@ -2085,7 +2091,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); - if (ObjSize==1 || ObjSize==2) { + if (ObjSize==1 || ObjSize==2 || + (ObjSize==4 && isSVR4ABI)) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) @@ -2093,10 +2100,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); + EVT ObjType = (ObjSize == 1 ? MVT::i8 : + (ObjSize == 2 ? MVT::i16 : MVT::i32)); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), - ObjSize==1 ? MVT::i8 : MVT::i16, - false, false, 0); + ObjType, false, false, 0); MemOps.push_back(Store); ++GPR_idx; } @@ -2107,8 +2115,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { // Store whatever pieces of the object are in registers - // to memory. ArgVal will be address of the beginning of - // the object. + // to memory. ArgOffset will be the address of the beginning + // of the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg; if (isPPC64) @@ -2118,7 +2126,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + SDValue Shifted = Val; + + // For 64-bit SVR4, small structs come in right-adjusted. + // Shift them left so the following logic works as expected. + if (ObjSize < 8 && isSVR4ABI) { + SDValue ShiftAmt = DAG.getConstant(64 - 8 * ObjSize, PtrVT); + Shifted = DAG.getNode(ISD::SHL, dl, PtrVT, Val, ShiftAmt); + } + + SDValue Store = DAG.getStore(Val.getValue(1), dl, Shifted, FIN, MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); @@ -2308,8 +2325,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( return Chain; } -/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus -/// linkage area for the Darwin ABI. +/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus +/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. static unsigned CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, @@ -2718,7 +2735,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack - // frame (this is done in LowerCall_Darwin()). + // frame (this is done in LowerCall_Darwin_Or_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. @@ -2808,6 +2825,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, return CallOpc; } +static +bool isLocalCall(const SDValue &Callee) +{ + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + return !G->getGlobal()->isDeclaration() && + !G->getGlobal()->isWeakForLinker(); + return false; +} + SDValue PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -2916,8 +2942,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if (CallOpc == PPCISD::CALL_SVR4) { - // Otherwise insert NOP. + } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { + // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP_SVR4; } } @@ -2960,25 +2986,25 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ins, DAG); if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) - return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, - dl, DAG, InVals); + return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); - return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, - dl, DAG, InVals); + return LowerCall_Darwin_Or_64SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); } SDValue -PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description +PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. assert((CallConv == CallingConv::C || @@ -3183,7 +3209,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, } SDValue -PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, +PPCTargetLowering::LowerCall_Darwin_Or_64SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -3192,6 +3218,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); + unsigned NumOps = Outs.size(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3299,12 +3327,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, } // FIXME memcpy is used way more than necessary. Correctness first. + // Note: "by value" is code for passing a structure by value, not + // basic types. if (Flags.isByVal()) { + // Note: Size includes alignment padding, so + // struct x { short a; char b; } + // will have Size = 4. With #pragma pack(1), it will have Size = 3. + // These are the proper values we need for right-justifying the + // aggregate in a parameter register for 64-bit SVR4. unsigned Size = Flags.getByValSize(); - if (Size==1 || Size==2) { - // Very small objects are passed right-justified. - // Everything else is passed left-justified. - EVT VT = (Size==1) ? MVT::i8 : MVT::i16; + // FOR DARWIN ONLY: Very small objects are passed right-justified. + // Everything else is passed left-justified. + // FOR 64-BIT SVR4: All aggregates smaller than 8 bytes must + // be passed right-justified. + if (Size==1 || Size==2 || + (Size==4 && isSVR4ABI)) { + EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT, @@ -3332,15 +3370,67 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registe |