aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2012-03-31 14:45:15 +0000
committerHal Finkel <hfinkel@anl.gov>2012-03-31 14:45:15 +0000
commit5b00ceaeeabff8c25abb09926343c3fcb06053d8 (patch)
tree871027bb491aa31258b3949de78554eee8caea81
parentf5f256cffd7535d31dc24e7c9166f2084fe66e47 (diff)
Fix dynamic linking on PPC64.
Dynamic linking on PPC64 has had problems since we had to move the top-down hazard-detection logic post-ra. For dynamic linking to work there needs to be a nop placed after every call. It turns out that it is really hard to guarantee that nothing will be placed in between the call (bl) and the nop during post-ra scheduling. Previous attempts at fixing this by placing logic inside the hazard detector only partially worked. This is now fixed in a different way: call+nop codegen-only instructions. As far as CodeGen is concerned the pair is now a single instruction and cannot be split. This solution works much better than previous attempts. The scoreboard hazard detector is also renamed to be more generic, there is currently no cpu-specific logic in it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153816 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp33
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h14
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp23
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td20
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td59
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp5
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td3
8 files changed, 126 insertions, 35 deletions
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index ae317af482..6ed1fb9e6a 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -22,17 +22,29 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-// PowerPC 440 Hazard Recognizer
-void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
+// PowerPC Scoreboard Hazard Recognizer
+void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
- if (!MCID) {
+ if (!MCID)
// This is a PPC pseudo-instruction.
return;
- }
ScoreboardHazardRecognizer::EmitInstruction(SU);
}
+ScheduleHazardRecognizer::HazardType
+PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void PPCScoreboardHazardRecognizer::AdvanceCycle() {
+ ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCScoreboardHazardRecognizer::Reset() {
+ ScoreboardHazardRecognizer::Reset();
+}
+
//===----------------------------------------------------------------------===//
// PowerPC 970 Hazard Recognizer
//
@@ -61,7 +73,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
: TII(tii) {
- LastWasBL8_ELF = false;
EndDispatchGroup();
}
@@ -132,15 +143,6 @@ getHazardType(SUnit *SU, int Stalls) {
return NoHazard;
unsigned Opcode = MI->getOpcode();
-
- // If the last instruction was a BL8_ELF, then the NOP must follow it
- // directly (this is strong requirement from the linker due to the ELF ABI).
- // We return only Hazard (and not NoopHazard) because if the NOP is necessary
- // then it will already be in the instruction stream (it is not always
- // necessary; tail calls, for example, do not need it).
- if (LastWasBL8_ELF && Opcode != PPC::NOP)
- return Hazard;
-
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
GetInstrType(Opcode, isFirst, isSingle, isCracked,
@@ -199,8 +201,6 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
return;
unsigned Opcode = MI->getOpcode();
- LastWasBL8_ELF = (Opcode == PPC::BL8_ELF);
-
bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType =
GetInstrType(Opcode, isFirst, isSingle, isCracked,
@@ -240,7 +240,6 @@ void PPCHazardRecognizer970::AdvanceCycle() {
}
void PPCHazardRecognizer970::Reset() {
- LastWasBL8_ELF = false;
EndDispatchGroup();
}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index d80a3850b0..55b45d01b2 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -21,16 +21,19 @@
namespace llvm {
-/// PPCHazardRecognizer440 - This class implements a scoreboard-based
-/// hazard recognizer for the PPC 440 and friends.
-class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer {
+/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for generic PPC processors.
+class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
const ScheduleDAG *DAG;
public:
- PPCHazardRecognizer440(const InstrItineraryData *ItinData,
+ PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG_) :
ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void Reset();
};
/// PPCHazardRecognizer970 - This class defines a finite state automata that
@@ -49,9 +52,6 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
// HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
bool HasCTRSet;
- // Was the last instruction issued a BL8_ELF
- bool LastWasBL8_ELF;
-
// StoredPtr - Keep track of the address of any store. If we see a load from
// the same address (or one that aliases it), disallow the store. We can have
// up to four stores in one dispatch group, hence we track up to 4.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bfa4df9248..8357e8bbdb 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -472,6 +472,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
case PPCISD::STD_32: return "PPCISD::STD_32";
case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
+ case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
case PPCISD::NOP: return "PPCISD::NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
@@ -2813,9 +2814,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
}
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
// Add a NOP immediately after the branch instruction when using the 64-bit
// SVR4 ABI. At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
@@ -2824,8 +2822,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
+
+ bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
if (CallOpc == PPCISD::BCTRL_SVR4) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -2836,14 +2835,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// since r2 is a reserved register (which prevents the register allocator
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
- InFlag = Chain.getValue(1);
- } else {
+ needsTOCRestore = true;
+ } else if (CallOpc == PPCISD::CALL_SVR4) {
// Otherwise insert NOP.
- InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+ CallOpc = PPCISD::CALL_NOP_SVR4;
}
}
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ if (needsTOCRestore) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag);
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 2e046c4693..e79229f49b 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -95,7 +95,9 @@ namespace llvm {
EXTSW_32,
/// CALL - A direct function call.
- CALL_Darwin, CALL_SVR4,
+ /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
+ /// SVR4 calls.
+ CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
/// NOP - Special NOP which follows 64-bit SVR4 calls.
NOP,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 78f3596152..e1d8b65b8a 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -89,10 +89,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1,
(outs), (ins calltarget:$func, variable_ops),
- "bl $func", BrB, []>; // See Pat patterns below.
+ "bl $func", BrB, []>; // See Pat patterns below.
+
+ let isCodeGenOnly = 1 in
+ def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func, variable_ops),
+ "bl $func\n\tnop", BrB, []>;
+
def BLA8_ELF : IForm<18, 1, 1,
(outs), (ins aaddr:$func, variable_ops),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+
+ let isCodeGenOnly = 1 in
+ def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins aaddr:$func, variable_ops),
+ "bla $func\n\tnop", BrB,
+ [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
}
let Uses = [X11, CTR8, RM] in {
def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
@@ -111,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
(BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
+ (BL8_NOP_ELF tglobaladdr:$dst)>;
+
def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
(BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
+ (BL8_NOP_ELF texternalsym:$dst)>;
+
def : Pat<(PPCnop),
(NOP)>;
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index d332e2ac97..d8e4b2bdf3 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+// Two joined instructions; used to emit two adjacent instructions as one.
+// The itinerary from the first instruction is used for scheduling and
+// classification.
+class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : Instruction {
+ field bits<64> Inst;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode1;
+ let Inst{32-37} = opcode2;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+}
// 1.7.1 I-Form
class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
@@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Addr = 0;
}
+class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = A;
+ let Inst{43-47} = Addr{20-16}; // Base Reg
+ let Inst{48-63} = Addr{15-0}; // Displacement
+}
+
+// This is used to emit BL8+NOP.
+class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : IForm_and_DForm_1<opcode1, aa, lk, opcode2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7a8ec40ab1..6f74828b21 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -51,7 +51,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_440) {
const InstrItineraryData *II = TM->getInstrItineraryData();
- return new PPCHazardRecognizer440(II, DAG);
+ return new PPCScoreboardHazardRecognizer(II, DAG);
}
return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
@@ -684,6 +684,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
return 0;
+ case PPC::BL8_NOP_ELF:
+ case PPC::BLA8_NOP_ELF:
+ return 8;
default:
return 4; // PowerPC instructions are all 4 bytes
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 939b71ae40..1b15df058c 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;