aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp60
-rw-r--r--lib/Target/PowerPC/PPCFrameInfo.h143
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp75
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h5
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td8
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp121
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td10
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h5
9 files changed, 361 insertions, 72 deletions
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index efbf749248..efb9e5c62c 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -81,13 +81,14 @@ namespace {
};
StringMap<FnStubInfo> FnStubs;
- StringMap<std::string> GVStubs, HiddenGVStubs;
+ StringMap<std::string> GVStubs, HiddenGVStubs, TOC;
const PPCSubtarget &Subtarget;
+ uint64_t LabelID;
public:
explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
const TargetAsmInfo *T, bool V)
: AsmPrinter(O, TM, T, V),
- Subtarget(TM.getSubtarget<PPCSubtarget>()) {}
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {}
virtual const char *getPassName() const {
return "PowerPC Assembly Printer";
@@ -310,6 +311,28 @@ namespace {
printOperand(MI, OpNo+1);
}
+ void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ assert(MO.getType() == MachineOperand::MO_GlobalAddress);
+
+ GlobalValue *GV = MO.getGlobal();
+
+ std::string Name = Mang->getMangledName(GV);
+
+ // Map symbol -> label of TOC entry.
+ if (TOC.count(Name) == 0) {
+ std::string Label;
+ Label += TAI->getPrivateGlobalPrefix();
+ Label += "C";
+ Label += utostr(LabelID++);
+
+ TOC[Name] = Label;
+ }
+
+ O << TOC[Name] << "@toc";
+ }
+
void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
const char *Modifier);
@@ -330,6 +353,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &F);
+ bool doFinalization(Module &M);
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -612,7 +636,19 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
printVisibility(CurrentFnName, F->getVisibility());
EmitAlignment(MF.getAlignment(), F);
- O << CurrentFnName << ":\n";
+
+ if (Subtarget.isPPC64()) {
+ // Emit an official procedure descriptor.
+ // FIXME 64-bit SVR4: Use MCSection here?
+ O << "\t.section\t\".opd\",\"aw\"\n";
+ O << "\t.align 3\n";
+ O << CurrentFnName << ":\n";
+ O << "\t.quad .L." << CurrentFnName << ",.TOC.@tocbase\n";
+ O << "\t.previous\n";
+ O << ".L." << CurrentFnName << ":\n";
+ } else {
+ O << CurrentFnName << ":\n";
+ }
// Emit pre-function debug information.
DW->BeginFunction(&MF);
@@ -731,6 +767,24 @@ void PPCLinuxAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) {
O << '\n';
}
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ const TargetData *TD = TM.getTargetData();
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ if (isPPC64 && !TOC.empty()) {
+ // FIXME 64-bit SVR4: Use MCSection here?
+ O << "\t.section\t\".toc\",\"aw\"\n";
+
+ for (StringMap<std::string>::iterator I = TOC.begin(), E = TOC.end();
+ I != E; ++I) {
+ O << I->second << ":\n";
+ O << "\t.tc " << I->getKeyData() << "[TC]," << I->getKeyData() << '\n';
+ }
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
/// runOnMachineFunction - This uses the printMachineInstruction()
/// method to print assembly for each instruction.
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
index 770a560ccf..c5636375bf 100644
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -31,33 +31,32 @@ public:
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
- static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) {
+ static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
if (isDarwinABI)
- return LP64 ? 16 : 8;
+ return isPPC64 ? 16 : 8;
// SVR4 ABI:
- return 4;
+ return isPPC64 ? 16 : 4;
}
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
- static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) {
+ static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
// For the Darwin ABI:
// Use the TOC save slot in the PowerPC linkage area for saving the frame
// pointer (if needed.) LLVM does not generate code that uses the TOC (R2
// is treated as a caller saved register.)
if (isDarwinABI)
- return LP64 ? 40 : 20;
+ return isPPC64 ? 40 : 20;
- // SVR4 ABI:
- // Save it right before the link register
+ // SVR4 ABI: First slot in the general register save area.
return -4U;
}
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
- static unsigned getLinkageSize(bool LP64, bool isDarwinABI) {
- if (isDarwinABI)
- return 6 * (LP64 ? 8 : 4);
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI || isPPC64)
+ return 6 * (isPPC64 ? 8 : 4);
// SVR4 ABI:
return 8;
@@ -65,27 +64,27 @@ public:
/// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
/// argument area.
- static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) {
- // For the Darwin ABI:
+ static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI / 64-bit SVR4 ABI:
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- if (isDarwinABI)
- return 8 * (LP64 ? 8 : 4);
+ if (isDarwinABI || isPPC64)
+ return 8 * (isPPC64 ? 8 : 4);
- // SVR4 ABI:
+ // 32-bit SVR4 ABI:
// There is no default stack allocated for the 8 first GPR arguments.
return 0;
}
/// getMinCallFrameSize - Return the minimum size a call frame can be using
/// the PowerPC ABI.
- static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) {
+ static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
// The call frame needs to be at least big enough for linkage and 8 args.
- return getLinkageSize(LP64, isDarwinABI) +
- getMinCallArgumentsSize(LP64, isDarwinABI);
+ return getLinkageSize(isPPC64, isDarwinABI) +
+ getMinCallArgumentsSize(isPPC64, isDarwinABI);
}
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
@@ -174,9 +173,113 @@ public:
std::pair<unsigned, int>(PPC::V20, -192)
};
- NumEntries = array_lengthof(Offsets);
+ static const std::pair<unsigned, int> Offsets64[] = {
+ // Floating-point register save area offsets.
+ std::pair<unsigned, int>(PPC::F31, -8),
+ std::pair<unsigned, int>(PPC::F30, -16),
+ std::pair<unsigned, int>(PPC::F29, -24),
+ std::pair<unsigned, int>(PPC::F28, -32),
+ std::pair<unsigned, int>(PPC::F27, -40),
+ std::pair<unsigned, int>(PPC::F26, -48),
+ std::pair<unsigned, int>(PPC::F25, -56),
+ std::pair<unsigned, int>(PPC::F24, -64),
+ std::pair<unsigned, int>(PPC::F23, -72),
+ std::pair<unsigned, int>(PPC::F22, -80),
+ std::pair<unsigned, int>(PPC::F21, -88),
+ std::pair<unsigned, int>(PPC::F20, -96),
+ std::pair<unsigned, int>(PPC::F19, -104),
+ std::pair<unsigned, int>(PPC::F18, -112),
+ std::pair<unsigned, int>(PPC::F17, -120),
+ std::pair<unsigned, int>(PPC::F16, -128),
+ std::pair<unsigned, int>(PPC::F15, -136),
+ std::pair<unsigned, int>(PPC::F14, -144),
+
+ // General register save area offsets.
+ // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+ // mode?
+ std::pair<unsigned, int>(PPC::R31, -4),
+ std::pair<unsigned, int>(PPC::R30, -12),
+ std::pair<unsigned, int>(PPC::R29, -20),
+ std::pair<unsigned, int>(PPC::R28, -28),
+ std::pair<unsigned, int>(PPC::R27, -36),
+ std::pair<unsigned, int>(PPC::R26, -44),
+ std::pair<unsigned, int>(PPC::R25, -52),
+ std::pair<unsigned, int>(PPC::R24, -60),
+ std::pair<unsigned, int>(PPC::R23, -68),
+ std::pair<unsigned, int>(PPC::R22, -76),
+ std::pair<unsigned, int>(PPC::R21, -84),
+ std::pair<unsigned, int>(PPC::R20, -92),
+ std::pair<unsigned, int>(PPC::R19, -100),
+ std::pair<unsigned, int>(PPC::R18, -108),
+ std::pair<unsigned, int>(PPC::R17, -116),
+ std::pair<unsigned, int>(PPC::R16, -124),
+ std::pair<unsigned, int>(PPC::R15, -132),
+ std::pair<unsigned, int>(PPC::R14, -140),
+
+ std::pair<unsigned, int>(PPC::X31, -8),
+ std::pair<unsigned, int>(PPC::X30, -16),
+ std::pair<unsigned, int>(PPC::X29, -24),
+ std::pair<unsigned, int>(PPC::X28, -32),
+ std::pair<unsigned, int>(PPC::X27, -40),
+ std::pair<unsigned, int>(PPC::X26, -48),
+ std::pair<unsigned, int>(PPC::X25, -56),
+ std::pair<unsigned, int>(PPC::X24, -64),
+ std::pair<unsigned, int>(PPC::X23, -72),
+ std::pair<unsigned, int>(PPC::X22, -80),
+ std::pair<unsigned, int>(PPC::X21, -88),
+ std::pair<unsigned, int>(PPC::X20, -96),
+ std::pair<unsigned, int>(PPC::X19, -104),
+ std::pair<unsigned, int>(PPC::X18, -112),
+ std::pair<unsigned, int>(PPC::X17, -120),
+ std::pair<unsigned, int>(PPC::X16, -128),
+ std::pair<unsigned, int>(PPC::X15, -136),
+ std::pair<unsigned, int>(PPC::X14, -144),
+
+ // CR save area offset.
+ // FIXME SVR4: Disable CR save area for now.
+// std::pair<unsigned, int>(PPC::CR2, -4),
+// std::pair<unsigned, int>(PPC::CR3, -4),
+// std::pair<unsigned, int>(PPC::CR4, -4),
+// std::pair<unsigned, int>(PPC::CR2LT, -4),
+// std::pair<unsigned, int>(PPC::CR2GT, -4),
+// std::pair<unsigned, int>(PPC::CR2EQ, -4),
+// std::pair<unsigned, int>(PPC::CR2UN, -4),
+// std::pair<unsigned, int>(PPC::CR3LT, -4),
+// std::pair<unsigned, int>(PPC::CR3GT, -4),
+// std::pair<unsigned, int>(PPC::CR3EQ, -4),
+// std::pair<unsigned, int>(PPC::CR3UN, -4),
+// std::pair<unsigned, int>(PPC::CR4LT, -4),
+// std::pair<unsigned, int>(PPC::CR4GT, -4),
+// std::pair<unsigned, int>(PPC::CR4EQ, -4),
+// std::pair<unsigned, int>(PPC::CR4UN, -4),
+
+ // VRSAVE save area offset.
+ std::pair<unsigned, int>(PPC::VRSAVE, -4),
+
+ // Vector register save area
+ std::pair<unsigned, int>(PPC::V31, -16),
+ std::pair<unsigned, int>(PPC::V30, -32),
+ std::pair<unsigned, int>(PPC::V29, -48),
+ std::pair<unsigned, int>(PPC::V28, -64),
+ std::pair<unsigned, int>(PPC::V27, -80),
+ std::pair<unsigned, int>(PPC::V26, -96),
+ std::pair<unsigned, int>(PPC::V25, -112),
+ std::pair<unsigned, int>(PPC::V24, -128),
+ std::pair<unsigned, int>(PPC::V23, -144),
+ std::pair<unsigned, int>(PPC::V22, -160),
+ std::pair<unsigned, int>(PPC::V21, -176),
+ std::pair<unsigned, int>(PPC::V20, -192)
+ };
- return Offsets;
+ if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
}
};
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 7ad81f8936..b77a35f629 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -212,8 +212,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- // VAARG is custom lowered with the SVR4 ABI
- if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI())
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ if ( TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ && !TM.getSubtarget<PPCSubtarget>().isPPC64())
setOperationAction(ISD::VAARG, MVT::Other, Custom);
else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -419,6 +420,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
@@ -428,6 +430,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STD_32: return "PPCISD::STD_32";
case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
+ case PPCISD::NOP: return "PPCISD::NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin";
case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4";
@@ -1176,6 +1179,13 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
const TargetMachine &TM = DAG.getTarget();
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero);
SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero);
@@ -1308,7 +1318,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) {
DebugLoc dl = Op.getDebugLoc();
- if (Subtarget.isDarwinABI()) {
+ if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1317,7 +1327,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
}
- // For the SVR4 ABI we follow the layout of the va_list struct.
+ // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
// We suppose the given va_list is already allocated.
//
// typedef struct {
@@ -1450,21 +1460,13 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT,
}
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
-/// depending on which subtarget is selected.
-static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
- if (Subtarget.isDarwinABI()) {
- static const unsigned FPR[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
- };
- return FPR;
- }
-
-
+/// on Darwin.
+static const unsigned *GetFPR() {
static const unsigned FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
+
return FPR;
}
@@ -1487,7 +1489,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
&Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
- if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
} else {
@@ -1505,7 +1507,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
- // SVR4 ABI Stack Frame Layout:
+ // 32-bit SVR4 ABI Stack Frame Layout:
// +-----------------------------------+
// +--> | Back chain |
// | +-----------------------------------+
@@ -1687,8 +1689,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
}
- // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is
- // set.
+ // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+ // is set.
// The double arguments are stored to the VarArgsFrameIndex
// on the stack.
@@ -1731,7 +1733,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
&Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
-
// TODO: add description of PPC stack frame format, or at least some docs.
//
MachineFunction &MF = DAG.getMachineFunction();
@@ -1756,7 +1757,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR(PPCSubTarget);
+ static const unsigned *FPR = GetFPR();
static const unsigned VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
@@ -1986,7 +1987,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
GPR_idx++;
}
ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs);
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
}
++VR_idx;
} else {
@@ -2262,8 +2263,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
PseudoSourceValue::getFixedStack(NewRetAddr), 0);
- // When using the SVR4 ABI there is no need to move the FP stack slot
- // as the FP is never overwritten.
+ // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+ // slot as the FP is never overwritten.
if (isDarwinABI) {
int NewFPLoc =
SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
@@ -2311,8 +2312,8 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
Chain = SDValue(LROpOut.getNode(), 1);
- // When using the SVR4 ABI there is no need to load the FP stack slot
- // as the FP is never overwritten.
+ // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+ // slot as the FP is never overwritten.
if (isDarwinABI) {
FPOpOut = getFramePointerFrameIndex(DAG);
FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
@@ -2487,7 +2488,6 @@ PPCTargetLowering::FinishCall(unsigned CallConv, DebugLoc dl, bool isTailCall,
int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) {
-
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
@@ -2529,6 +2529,19 @@ PPCTargetLowering::FinishCall(unsigned CallConv, DebugLoc dl, bool isTailCall,
Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
+ // Add a NOP immediately after the branch instruction when using the 64-bit
+ // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // thus have a different TOC, the call will be replaced with a call to a stub
+ // function which saves the current TOC, loads the TOC of the callee and
+ // branches to the callee. The NOP will be replaced with a load instruction
+ // which restores the TOC of the caller from the TOC save slot of the current
+ // stack frame. If caller and callee belong to the same module (and have the
+ // same TOC), the NOP will remain unchanged.
+ if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ // Insert NOP.
+ InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag);
+ }
+
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag);
@@ -2547,7 +2560,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
- if (PPCSubTarget.isSVR4ABI()) {
+ if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, Ins,
dl, DAG, InVals);
@@ -2567,7 +2580,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
// See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
- // of the SVR4 ABI stack frame layout.
+ // of the 32-bit SVR4 ABI stack frame layout.
assert((!isTailCall ||
(CallConv == CallingConv::Fast && PerformTailCallOpt)) &&
@@ -2846,7 +2859,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const unsigned *FPR = GetFPR(PPCSubTarget);
+ static const unsigned *FPR = GetFPR();
static const unsigned VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 7830e0f08d..19fef4da0b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -60,6 +60,8 @@ namespace llvm {
/// though these are usually folded into other nodes.
Hi, Lo,
+ TOC_ENTRY,
+
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
/// compute an allocation on the stack.
@@ -84,6 +86,9 @@ namespace llvm {
/// CALL - A direct function call.
CALL_Darwin, CALL_SVR4,
+ /// NOP - Special NOP which follows 64-bit SVR4 calls.
+ NOP,
+
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 3823e537f1..85524cef01 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -123,6 +123,8 @@ def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
(BL8_ELF tglobaladdr:$dst)>;
def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
(BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCnop),
+ (NOP)>;
// Atomic operations
let usesCustomDAGSchedInserter = 1 in {
@@ -543,6 +545,10 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
[(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+ "ld $rD, $disp($reg)", LdStLD,
+ [(set G8RC:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
[(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 759cdf0a48..3c32c4abfc 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -53,6 +53,8 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
+def SDT_PPCnop : SDTypeProfile<0, 0, []>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
@@ -85,6 +87,7 @@ def PPCfsel : SDNode<"PPCISD::FSEL",
def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
@@ -111,6 +114,7 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
@@ -306,6 +310,10 @@ def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
}
+def tocentry : Operand<iPTR> {
+ let PrintMethod = "printTOCEntryLabel";
+ let MIOperandInfo = (ops i32imm:$imm);
+}
// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg
// that doesn't matter.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index b124e60735..2eb2abc047 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -174,7 +174,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::LR, 0
};
-
+
+ // 32-bit SVR4 calling convention.
static const unsigned SVR4_CalleeSavedRegs[] = {
PPC::R14, PPC::R15,
PPC::R16, PPC::R17, PPC::R18, PPC::R19,
@@ -200,7 +201,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
- PPC::LR, 0
+ 0
};
// 64-bit Darwin calling convention.
static const unsigned Darwin64_CalleeSavedRegs[] = {
@@ -227,12 +228,41 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
PPC::LR8, 0
};
+
+ // 64-bit SVR4 calling convention.
+ static const unsigned SVR4_64_CalleeSavedRegs[] = {
+ PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+ PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+ PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+ PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+ PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+ PPC::F30, PPC::F31,
+
+ PPC::CR2, PPC::CR3, PPC::CR4,
+
+ PPC::VRSAVE,
+
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+ 0
+ };
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
Darwin32_CalleeSavedRegs;
-
- return SVR4_CalleeSavedRegs;
+
+ return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
}
const TargetRegisterClass* const*
@@ -267,6 +297,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&PPC::GPRCRegClass, 0
};
+ // 32-bit SVR4 calling convention.
static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = {
&PPC::GPRCRegClass,&PPC::GPRCRegClass,
&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
@@ -295,7 +326,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
&PPC::CRBITRCRegClass,
- &PPC::GPRCRegClass, 0
+ 0
};
// 64-bit Darwin calling convention.
@@ -327,12 +358,45 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&PPC::G8RCRegClass, 0
};
+
+ // 64-bit SVR4 calling convention.
+ static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = {
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+ &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
+
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
+ &PPC::F8RCRegClass,&PPC::F8RCRegClass,
+
+ &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
+
+ &PPC::VRSAVERCRegClass,
+
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+ &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
+
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
+ &PPC::CRBITRCRegClass,
+
+ 0
+ };
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses :
Darwin32_CalleeSavedRegClasses;
- return SVR4_CalleeSavedRegClasses;
+ return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses
+ : SVR4_CalleeSavedRegClasses;
}
// needsFP - Return true if the specified function should have a dedicated frame
@@ -364,9 +428,9 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R13); // Small Data Area pointer register
}
- // On PPC64, r13 is the thread pointer. Never allocate this register. Note
- // that this is over conservative, as it also prevents allocation of R31 when
- // the FP is not needed.
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ // Note that this is over conservative, as it also prevents allocation of R31
+ // when the FP is not needed.
if (Subtarget.isPPC64()) {
Reserved.set(PPC::R13);
Reserved.set(PPC::R31);
@@ -378,6 +442,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::X1);
Reserved.set(PPC::X13);
Reserved.set(PPC::X31);
+
+ // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::X2);
+ }
}
if (needsFP(MF))
@@ -911,7 +980,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
// don't have a frame pointer, calls, or dynamic alloca then we do not need
// to adjust the stack pointer (we fit in the Red Zone).
bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
- // FIXME SVR4 The SVR4 ABI has no red zone.
+ // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
if (!DisableRedZone &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
@@ -1006,7 +1075,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (!Subtarget.isSVR4ABI()) {
return;
}
-
+
// Get callee saved register information.
MachineFrameInfo *FFI = MF.getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
@@ -1017,16 +1086,19 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
}
unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
unsigned MinFPR = PPC::F31;
unsigned MinVR = PPC::V31;
bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
bool HasFPSaveArea = false;
bool HasCRSaveArea = false;
bool HasVRSAVESaveArea = false;
bool HasVRSaveArea = false;
SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
SmallVector<CalleeSavedInfo, 18> FPRegs;
SmallVector<CalleeSavedInfo, 18> VRegs;
@@ -1042,6 +1114,14 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinGPR) {
MinGPR = Reg;
}
+ } else if (RC == PPC::G8RCRegisterClass) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
} else if (RC == PPC::F8RCRegisterClass) {
HasFPSaveArea = true;
@@ -1104,7 +1184,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// General register save area starts right below the Floating-point
// register save area.
- if (HasGPSaveArea) {
+ if (HasGPSaveArea || HasG8SaveArea) {
// Move general register save area spill slots down, taking into account
// the size of the Floating-point register save area.
for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
@@ -1113,7 +1193,22 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
- LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4;
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg = std::min<unsigned>(getRegisterNumbering(MinGPR),
+ getRegisterNumbering(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
}
// The CR save area is below the general register save area.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index bac8e3aed8..140f5df3e6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -280,7 +280,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
let MethodBodies = [{
GPRCClass::iterator
GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
- // In Linux, r2 is reserved for the OS.
+ // 32-bit SVR4 ABI: r2 is reserved for the OS.
+ // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
return begin()+1;
@@ -291,7 +292,7 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
// On PPC64, r13 is the thread pointer. Never allocate this register.
// Note that this is overconservative, as it also prevents allocation of
// R31 when the FP is not needed.
- // When using the SVR4 ABI, r13 is reserved for the Small Data Area
+ // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area
// pointer.
const PPCSubtarget &Subtarget
= MF.getTarget().getSubtarget<PPCSubtarget>();
@@ -318,6 +319,10 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
let MethodBodies = [{
G8RCClass::iterator
G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
+ // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
+ if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
+ return begin()+1;
+
return begin();
}
G8RCClass::iterator
@@ -372,4 +377,3 @@ def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
-
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 6787390f92..02c8ad79bd 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -138,8 +138,9 @@ public:
/// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
unsigned getDarwinVers() const { return DarwinVers; }
- bool isDarwinABI() const { return isDarwin() || IsPPC64; }
- bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; }
+ bool isDarwinABI() const { return isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwin(); }
+
};
} // End llvm namespace