diff options
Diffstat (limited to 'lib/Target/PowerPC')
34 files changed, 1444 insertions, 539 deletions
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 192d18d664..6036428fad 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCTargetTransformInfo.cpp PPCSelectionDAGInfo.cpp ) diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 631f749581..f24edf62ed 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -32,6 +32,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_8: case PPC::fixup_ppc_toc: case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: return Value; case PPC::fixup_ppc_lo14: case PPC::fixup_ppc_toc16_ds: @@ -85,7 +86,8 @@ public: { "fixup_ppc_toc", 0, 64, 0 }, { "fixup_ppc_toc16", 16, 16, 0 }, { "fixup_ppc_toc16_ds", 16, 14, 0 }, - { "fixup_ppc_tlsreg", 0, 0, 0 } + { "fixup_ppc_tlsreg", 0, 0, 0 }, + { "fixup_ppc_nofixup", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) @@ -117,7 +119,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 462d7072b5..61868d446f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" @@ -33,9 +34,25 @@ namespace { const MCFixup &Fixup, bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs); + }; + + class PPCELFRelocationEntry : public ELFRelocationEntry { + public: + PPCELFRelocationEntry(const ELFRelocationEntry &RE); + bool operator<(const PPCELFRelocationEntry &RE) const { + return (RE.r_offset < r_offset || + (RE.r_offset == r_offset && RE.Type > Type)); + } }; } +PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) + : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, + RE.r_addend, *RE.Fixup) {} + PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_br24: Type = ELF::R_PPC_REL24; break; + case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; break; + case FK_Data_8: + case FK_PCRel_8: + Type = ELF::R_PPC64_REL64; + break; } } else { switch ((unsigned)Fixup.getKind()) { @@ -79,12 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL16_HA: Type = ELF::R_PPC_TPREL16_HA; break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + Type = ELF::R_PPC64_DTPREL16_HA; + break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_HA; break; case MCSymbolRefExpr::VK_PPC_TOC16_HA: Type = ELF::R_PPC64_TOC16_HA; break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + Type = ELF::R_PPC64_GOT_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + Type = ELF::R_PPC64_GOT_TLSGD16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + Type = ELF::R_PPC64_GOT_TLSLD16_HA; + break; } break; case PPC::fixup_ppc_lo16: @@ -93,12 +127,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL16_LO: Type = ELF::R_PPC_TPREL16_LO; break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_LO; break; case MCSymbolRefExpr::VK_PPC_TOC16_LO: Type = ELF::R_PPC64_TOC16_LO; break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + Type = ELF::R_PPC64_GOT_TLSGD16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; } break; case PPC::fixup_ppc_lo14: @@ -108,7 +151,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, Type = ELF::R_PPC64_TOC; break; case PPC::fixup_ppc_toc16: - Type = ELF::R_PPC64_TOC16; + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TPREL16_LO: + Type = ELF::R_PPC64_TPREL16_LO; + break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_TOC16; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; + } break; case PPC::fixup_ppc_toc16_ds: switch (Modifier) { @@ -119,14 +179,25 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TOC16_LO: Type = ELF::R_PPC64_TOC16_LO_DS; break; - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS: - Type = ELF::R_PPC64_GOT_TPREL16_DS; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; break; } break; case PPC::fixup_ppc_tlsreg: Type = ELF::R_PPC64_TLS; break; + case PPC::fixup_ppc_nofixup: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TLSGD: + Type = ELF::R_PPC64_TLSGD; + break; + case MCSymbolRefExpr::VK_PPC_TLSLD: + Type = ELF::R_PPC64_TLSLD; + break; + } + break; case FK_Data_8: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -191,6 +262,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } } +// The standard sorter only sorts on the r_offset field, but PowerPC can +// have multiple relocations at the same offset. Sort secondarily on the +// relocation type to avoid nondeterminism. +void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + + // Copy to a temporary vector of relocation entries having a different + // sort function. + std::vector<PPCELFRelocationEntry> TmpRelocs; + + for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); + R != Relocs.end(); ++R) { + TmpRelocs.push_back(PPCELFRelocationEntry(*R)); + } + + // Sort in place by ascending r_offset and descending r_type. + array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); + + // Copy back to the original vector. + unsigned I = 0; + for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); + R != TmpRelocs.end(); ++R, ++I) { + Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, + R->Symbol, R->r_addend, *R->Fixup); + } +} + + MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 75bb851630..7917f7736e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -47,6 +47,10 @@ enum Fixups { /// fixup_ppc_tlsreg - Insert thread-pointer register number. fixup_ppc_tlsreg, + + /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call + /// to __tls_get_addr for the TLS general and local dynamic models. + fixup_ppc_nofixup, // Marker LastTargetFixupKind, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 215aa40c4a..a25d7fe64f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -17,8 +17,9 @@ using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; PCSymbol = "."; @@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { void PPCLinuxMCAsmInfo::anchor() { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; // ".comm align is in bytes but .align is pow-2." diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 5b208d41f4..d048426d43 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -62,8 +63,6 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const; unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, @@ -82,11 +81,12 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); - // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the + // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the // following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF) + if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF || + Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) Size = 8; // Output the constant in big endian byte order. @@ -119,6 +119,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_br24)); + + // For special TLS calls, add another fixup for the symbol. Apparently + // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently + // similar that TblGen will not generate a separate case for the latter + // two, so this is the only way to get the extra fixup generated. + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) { + const MCOperand &MO2 = MI.getOperand(OpNo+1); + Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), + (MCFixupKind)PPC::fixup_ppc_nofixup)); + } return 0; } @@ -199,17 +210,6 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, } -unsigned PPCMCCodeEmitter::getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - const MCOperand &MO = MI.getOperand(OpNo); - - // Add a fixup for the GOT displacement to the TLS block offset. - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16_ds)); - return 0; -} - - unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); @@ -223,7 +223,6 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, return getPPCRegisterNumbering(PPC::X13); } - unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index a0e4cf3005..4a420929d0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -14,6 +14,9 @@ #ifndef PPCMCTARGETDESC_H #define PPCMCTARGETDESC_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + #include "llvm/Support/DataTypes.h" namespace llvm { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index f872e861bf..972e13852e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -14,6 +14,9 @@ #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H #define LLVM_TARGET_POWERPC_PPCPREDICATES_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index bbd247f20f..f71979f245 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -25,6 +25,7 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; + class ImmutablePass; class JITCodeEmitter; class MachineInstr; class AsmPrinter; @@ -37,6 +38,9 @@ namespace llvm { JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); + + /// \brief Creates an PPC-specific Target Transformation Info pass. + ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); namespace PPCII { @@ -53,28 +57,32 @@ namespace llvm { /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). - MO_PIC_FLAG = 4, + MO_PIC_FLAG = 2, /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). - MO_NLP_FLAG = 8, + MO_NLP_FLAG = 4, /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a /// symbol with hidden visibility. This causes a different kind of /// non-lazy-pointer to be generated. - MO_NLP_HIDDEN_FLAG = 16, + MO_NLP_HIDDEN_FLAG = 8, /// The next are not flags but distinct values. - MO_ACCESS_MASK = 0xe0, + MO_ACCESS_MASK = 0xf0, /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 1 << 5, - MO_HA16 = 2 << 5, + MO_LO16 = 1 << 4, + MO_HA16 = 2 << 4, + + MO_TPREL16_HA = 3 << 4, + MO_TPREL16_LO = 4 << 4, - MO_TPREL16_HA = 3 << 5, - MO_TPREL16_LO = 4 << 5, - MO_GOT_TPREL16_DS = 5 << 5, - MO_TLS = 6 << 5 + /// These values identify relocations on immediates folded + /// into memory operations. + MO_DTPREL16_LO = 5 << 4, + MO_TLSLD16_LO = 6 << 4, + MO_TOC16_LO = 7 << 4 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index cb15dadb7e..992913602a 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E5500", "">; +def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; +def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; +def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; +def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; +def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", @@ -58,6 +63,25 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; +def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", + "Enable QPX instructions">; + +// Note: Future features to add when support is extended to more +// recent ISA levels: +// +// CMPB p6, p6x, p7 cmpb +// DFP p6, p6x, p7 decimal floating-point instructions +// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz +// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz +// FRE p5 through p7 fre (vs. fres, available since p3) +// FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) +// LDBRX p7 load with byte reversal +// LFIWAX p6, p6x, p7 lfiwax +// LFIWZX p7 lfiwzx +// POPCNTB p5 through p7 popcntb and related instructions +// POPCNTD p7 popcntd and related instructions +// RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates +// VSX p7 vector-scalar instruction set //===----------------------------------------------------------------------===// // Register File Description @@ -109,10 +133,30 @@ def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, FeatureSTFIWX, FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureMFOCRF, FeatureFSqrt, + FeatureSTFIWX, FeatureISEL, + Feature64Bit /*, Feature64BitRegs */, + FeatureQPX]>; +def : Processor<"pwr3", G5Itineraries, + [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, + FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr4", G5Itineraries, + [DirectivePwr4, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr5", G5Itineraries, + [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; +def : Processor<"pwr5x", G5Itineraries, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"pwr6x", G5Itineraries, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4315bc1a07..eae9b7b7fb 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -32,9 +32,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Constants.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -44,7 +45,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -426,20 +426,25 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { bool IsExternal = false; bool IsFunction = false; bool IsCommon = false; + bool IsAvailExt = false; if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = Mang->getSymbol(GValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); - IsCommon = GVar && GValue->hasCommonLinkage(); + IsCommon = GVar && RealGValue->hasCommonLinkage(); IsFunction = !GVar; + IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - if (IsExternal || IsFunction || IsCommon || MO.isJTI()) + if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -459,17 +464,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // associated TOC entry. Otherwise reference the symbol directly. TmpInst.setOpcode(PPC::LDrs); const MachineOperand &MO = MI->getOperand(1); - assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!"); + assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && + "Invalid operand for LDtocL!"); MCSymbol *MOSymbol = 0; if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); - else { + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = Mang->getSymbol(GValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); - if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage()) + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -496,8 +508,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - MOSymbol = Mang->getSymbol(GValue); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); IsExternal = GVar && !GVar->hasInitializer(); IsFunction = !GVar; } else if (MO.isCPI()) @@ -513,8 +528,24 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } - case PPC::LDgotTPREL: { - // Transform %Xd = LDgotTPREL <ga:@sym>, %Xs + case PPC::ADDISgotTprelHA: { + // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTprel)); + return; + } + case PPC::LDgotTprelL: { + // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); // Change the opcode to LDrs, which is a form of LD with the offset @@ -524,12 +555,148 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); const MCExpr *Exp = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS, + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO, OutContext); TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); OutStreamer.EmitInstruction(TmpInst); return; } + case PPC::ADDIStlsgdHA: { + // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::ADDItlsgdL: { + // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::GETtlsADDR: { + // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> + // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDIStlsldHA: { + // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::ADDItlsldL: { + // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::GETtlsldADDR: { + // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> + // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDISdtprelHA: { + // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> + // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X3) + .addExpr(SymDtprel)); + return; + } + case PPC::ADDIdtprelL: { + // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> + // Into: %Xd = ADDI8L %Xs, sym@dtprel@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); + return; + } case PPC::MFCRpseud: case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 @@ -568,14 +735,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - 8/*size*/, 0/*addrspace*/); + 8 /*size*/); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOC, OutContext), - 8/*size*/, 0/*addrspace*/); + 8/*size*/); // Emit a null environment pointer. - OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); + OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -604,6 +771,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } + MachineModuleInfoELF &MMIELF = + MMI->getObjFileInfo<MachineModuleInfoELF>(); + + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + return AsmPrinter::doFinalization(M); } @@ -638,7 +824,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppcA2", "ppce500mc", "ppce5500", + "power3", + "power4", + "power5", + "power5x", "power6", + "power6x", "power7", "ppc64" }; @@ -653,8 +844,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { assert(Directive <= PPC::DIR_64 && "Directive out of range."); // FIXME: This is a total hack, finish mc'izing the PPC backend. - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { + assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) && + "CPUDirectives[] might not be up-to-date!"); OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); + } // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. @@ -867,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/); else // Internal to current translation unit. // @@ -877,7 +1071,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); @@ -896,7 +1090,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 99115752f7..bd1c378681 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -28,10 +28,16 @@ using namespace llvm; STATISTIC(NumExpanded, "Number of branches expanded to long format"); +namespace llvm { + void initializePPCBSelPass(PassRegistry&); +} + namespace { struct PPCBSel : public MachineFunctionPass { static char ID; - PPCBSel() : MachineFunctionPass(ID) {} + PPCBSel() : MachineFunctionPass(ID) { + initializePPCBSelPass(*PassRegistry::getPassRegistry()); + } /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -45,6 +51,9 @@ namespace { char PPCBSel::ID = 0; } +INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector", + false, false) + /// createPPCBranchSelectionPass - returns an instance of the Branch Selection /// Pass /// diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 05db2c5e73..b98cc489f6 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -43,7 +43,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Constants.h" +#include "llvm/IR/Constants.h" #include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -54,6 +54,10 @@ using namespace llvm; STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); +namespace llvm { + void initializePPCCTRLoopsPass(PassRegistry&); +} + namespace { class CountValue; struct PPCCTRLoops : public MachineFunctionPass { @@ -64,7 +68,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - PPCCTRLoops() : MachineFunctionPass(ID) {} + PPCCTRLoops() : MachineFunctionPass(ID) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -174,6 +180,12 @@ namespace { }; } // end anonymous namespace +INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 3f87e883b1..caeb1796f7 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, - CCIfType<[f32], CCAssignToReg<[F1]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, // Vector types are always returned in V2. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> @@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[ //===----------------------------------------------------------------------===// -// PowerPC Argument Calling Conventions -//===----------------------------------------------------------------------===// -/* -def CC_PPC : CallingConv<[ - // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, - CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, - - // Common sub-targets passes FP values in F1 - F13 - CCIfType<[f32, f64], - CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, - - // The first 12 Vector arguments are passed in altivec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> - -/* - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToStack<16, 16>>*/ -]>; - -*/ - -//===----------------------------------------------------------------------===// -// PowerPC System V Release 4 ABI +// PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// -def CC_PPC_SVR4_Common : CallingConv<[ +def CC_PPC32_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. - CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, + CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. - CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>, + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>, // FP values are passed in F1 - F8. CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, @@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[ // This calling convention puts vector arguments always on the stack. It is used // to assign vector arguments which belong to the variable portion of the // parameter list of a variable argument function. -def CC_PPC_SVR4_VarArg : CallingConv<[ - CCDelegateTo<CC_PPC_SVR4_Common> +def CC_PPC32_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC32_SVR4_Common> ]>; -// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put -// vector arguments in vector registers before putting them on the stack. -def CC_PPC_SVR4 : CallingConv<[ +// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to +// put vector arguments in vector registers before putting them on the stack. +def CC_PPC32_SVR4 : CallingConv<[ // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, - CCDelegateTo<CC_PPC_SVR4_Common> + CCDelegateTo<CC_PPC32_SVR4_Common> ]>; // Helper "calling convention" to handle aggregate by value arguments. @@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[ // Still, the address of the aggregate copy in the callers stack frame is passed // in a GPR (or in the parameter list area if all GPRs are allocated) from the // caller to the callee. The location for the address argument is assigned by -// the CC_PPC_SVR4 calling convention. +// the CC_PPC32_SVR4 calling convention. // -// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are // not passed by value. -def CC_PPC_SVR4_ByVal : CallingConv<[ +def CC_PPC32_SVR4_ByVal : CallingConv<[ CCIfByVal<CCPassByVal<4, 4>>, - CCCustom<"CC_PPC_SVR4_Custom_Dummy"> + CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 0dcb5deb5d..d68bfd12e4 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -68,7 +68,6 @@ namespace { unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; - unsigned getTLSOffsetEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; const char *getPassName() const { return "PowerPC Machine Code Emitter"; } @@ -245,13 +244,6 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, } -unsigned PPCCodeEmitter::getTLSOffsetEncoding(const MachineInstr &MI, - unsigned OpNo) const { - llvm_unreachable("TLS not supported on the old JIT."); - return 0; -} - - unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const { llvm_unreachable("TLS not supported on the old JIT."); diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index fdd85ff11f..0a396e6693 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -119,12 +119,21 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(*I); - if (VRRegNo[RegNo] == *I) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + + // Live out registers appear as use operands on return instructions. + for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); + UsedRegMask != 0 && BI != BE; ++BI) { + const MachineBasicBlock &MBB = *BI; + if (MBB.empty() || !MBB.back().isReturn()) + continue; + const MachineInstr &Ret = MBB.back(); + for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Ret.getOperand(I); + if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) + continue; + unsigned RegNo = getPPCRegisterNumbering(MO.getReg()); + UsedRegMask &= ~(1 << (31-RegNo)); + } } // If no registers are used, turn this into a copy. @@ -198,13 +207,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. - bool DisableRedZone = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can - // still generate stackless code if all local vars are reg-allocated. - // Try: (FrameSize <= 224 - // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) + // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate + // stackless code if all local vars are reg-allocated. + bool DisableRedZone = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); if (!DisableRedZone && + (Subtarget.isPPC64() || // 32-bit SVR4, no stack- + !Subtarget.isSVR4ABI() || // allocated locals. + FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. @@ -261,7 +271,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -776,7 +787,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); - MF.getRegInfo().setPhysRegUnused(LR); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.setPhysRegUnused(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); @@ -801,6 +813,16 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } + // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the + // function uses CR 2, 3, or 4. + if (!isPPC64 && !isDarwinABI && + (MRI.isPhysRegUsed(PPC::CR2) || + MRI.isPhysRegUsed(PPC::CR3) || + MRI.isPhysRegUsed(PPC::CR4))) { + int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + FI->setCRSpillFrameIndex(FrameIdx); + } + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. @@ -1114,6 +1136,47 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, .addReg(MoveReg)); } +void PPCFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually subtract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + + if (isInt<16>(CalleeAmt)) { + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addImm(CalleeAmt); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addReg(TmpReg); + } + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 3517d8c086..d09e47fafd 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -27,7 +27,8 @@ class PPCFrameLowering: public TargetFrameLowering { public: PPCFrameLowering(const PPCSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0), Subtarget(sti) { } @@ -50,6 +51,10 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index abcaa9cab0..17bea8a6a6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -21,11 +21,12 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -33,6 +34,10 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +namespace llvm { + void initializePPCDAGToDAGISelPass(PassRegistry&); +} + namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine @@ -47,7 +52,9 @@ namespace { explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) {} + PPCSubTarget(*TM.getSubtargetImpl()) { + initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary @@ -60,6 +67,8 @@ namespace { return true; } + virtual void PostprocessISelDAG(); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -1272,16 +1281,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::TOC_ENTRY: { assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); - // For medium code model, we generate two instructions as described - // below. Otherwise we allow SelectCodeCommon to handle this, selecting - // one of LDtoc, LDtocJTI, and LDtocCPT. - if (TM.getCodeModel() != CodeModel::Medium) + // For medium and large code model, we generate two instructions as + // described below. Otherwise we allow SelectCodeCommon to handle this, + // selecting one of LDtoc, LDtocJTI, and LDtocCPT. + CodeModel::Model CModel = TM.getCodeModel(); + if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; // The first source operand is a TargetGlobalAddress or a // TargetJumpTable. If it is an externally defined symbol, a symbol // with common linkage, a function address, or a jump table address, - // we generate: + // or if we are generating code for large code model, we generate: // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) // Otherwise we generate: // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) @@ -1290,20 +1300,24 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, TOCbase, GA); - if (isa<JumpTableSDNode>(GA)) + if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GValue = G->getGlobal(); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue); - assert((GVar || isa<Function>(GValue)) && + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + assert((GVar || isa<Function>(RealGValue)) && "Unexpected global value subclass!"); // An external variable is one without an initializer. For these, // for variables with common linkage, and for Functions, generate // the LDtocL form. - if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage()) + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } @@ -1311,16 +1325,231 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, SDValue(Tmp, 0), GA); } - case PPCISD::LD_GOT_TPREL: { - assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); - return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64, - N->getOperand(0), N->getOperand(1)); + case PPCISD::VADD_SPLAT: { + // This expands into one of three sequences, depending on whether + // the first operand is odd or even, positive or negative. + assert(isa<ConstantSDNode>(N->getOperand(0)) && + isa<ConstantSDNode>(N->getOperand(1)) && + "Invalid operand on VADD_SPLAT!"); + + int Elt = N->getConstantOperandVal(0); + int EltSize = N->getConstantOperandVal(1); + unsigned Opc1, Opc2, Opc3; + EVT VT; + + if (EltSize == 1) { + Opc1 = PPC::VSPLTISB; + Opc2 = PPC::VADDUBM; + Opc3 = PPC::VSUBUBM; + VT = MVT::v16i8; + } else if (EltSize == 2) { + Opc1 = PPC::VSPLTISH; + Opc2 = PPC::VADDUHM; + Opc3 = PPC::VSUBUHM; + VT = MVT::v8i16; + } else { + assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); + Opc1 = PPC::VSPLTISW; + Opc2 = PPC::VADDUWM; + Opc3 = PPC::VSUBUWM; + VT = MVT::v4i32; + } + + if ((Elt & 1) == 0) { + // Elt is even, in the range [-32,-18] + [16,30]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp = VSPLTIS[BHW] elt + // VADDU[BHW]M tmp, tmp + // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + SDValue EltVal = getI32Imm(Elt >> 1); + SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + SDValue TmpVal = SDValue(Tmp, 0); + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + + } else if (Elt > 0) { + // Elt is odd and positive, in the range [17,31]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt-16 + // tmp2 = VSPLTIS[BHW] -16 + // VSUBU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt - 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + + } else { + // Elt is odd and negative, in the range [-31,-17]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt+16 + // tmp2 = VSPLTIS[BHW] -16 + // VADDU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt + 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + } } } return SelectCode(N); } +/// PostProcessISelDAG - Perform some late peephole optimizations +/// on the DAG representation. +void PPCDAGToDAGISel::PostprocessISelDAG() { + + // Skip peepholes at -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // These optimizations are currently supported only for 64-bit SVR4. + if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + return; + + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + unsigned FirstOp; + unsigned StorageOpcode = N->getMachineOpcode(); + + switch (StorageOpcode) { + default: continue; + + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LD: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWA: + case PPC::LWZ: + case PPC::LWZ8: + FirstOp = 0; + break; + + case PPC::STB: + case PPC::STB8: + case PPC::STD: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + FirstOp = 1; + break; + } + + // If this is a load or store with a zero offset, we may be able to + // fold an add-immediate into the memory operation. + if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) || + N->getConstantOperandVal(FirstOp) != 0) + continue; + + SDValue Base = N->getOperand(FirstOp + 1); + if (!Base.isMachineOpcode()) + continue; + + unsigned Flags = 0; + bool ReplaceFlags = true; + + // When the feeding operation is an add-immediate of some sort, + // determine whether we need to add relocation information to the + // target flags on the immediate operand when we fold it into the + // load instruction. + // + // For something like ADDItocL, the relocation information is + // inferred from the opcode; when we process it in the AsmPrinter, + // we add the necessary relocation there. A load, though, can receive + // relocation from various flavors of ADDIxxx, so we need to carry + // the relocation information in the target flags. + switch (Base.getMachineOpcode()) { + default: continue; + + case PPC::ADDI8: + case PPC::ADDI8L: + case PPC::ADDIL: + // In some cases (such as TLS) the relocation information + // is already in place on the operand, so copying the operand + // is sufficient. + ReplaceFlags = false; + // For these cases, the immediate may not be divisible by 4, in + // which case the fold is illegal for DS-form instructions. (The + // other cases provide aligned addresses and are always safe.) + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && + (!isa<ConstantSDNode>(Base.getOperand(1)) || + Base.getConstantOperandVal(1) % 4 != 0)) + continue; + break; + case PPC::ADDIdtprelL: + Flags = PPCII::MO_DTPREL16_LO; + break; + case PPC::ADDItlsldL: + Flags = PPCII::MO_TLSLD16_LO; + break; + case PPC::ADDItocL: + Flags = PPCII::MO_TOC16_LO; + break; + } + + // We found an opportunity. Reverse the operands from the add + // immediate and substitute them into the load or store. If + // needed, update the target flags for the immediate operand to + // reflect the necessary relocation information. + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + SDValue ImmOpnd = Base.getOperand(1); + + // If the relocation information isn't already present on the + // immediate operand, add it now. + if (ReplaceFlags) { + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + } else if (ConstantPoolSDNode *CP = + dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); + } + } + + if (FirstOp == 1) // Store + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); + else // Load + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} /// createPPCISelDag - This pass converts a legalized DAG into a @@ -1330,3 +1559,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { return new PPCDAGToDAGISel(TM); } +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, + false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce); +} + diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1168171b23..13cb358fc0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -17,7 +17,6 @@ #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -25,10 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -36,20 +36,20 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -132,11 +132,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); @@ -376,6 +378,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -442,6 +445,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? @@ -495,15 +500,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || Subtarget->getDarwinDirective() == PPC::DIR_E5500) { - maxStoresPerMemset = 32; - maxStoresPerMemsetOptSize = 16; - maxStoresPerMemcpy = 32; - maxStoresPerMemcpyOptSize = 8; - maxStoresPerMemmove = 32; - maxStoresPerMemmoveOptSize = 8; + MaxStoresPerMemset = 32; + MaxStoresPerMemsetOptSize = 16; + MaxStoresPerMemcpy = 32; + MaxStoresPerMemcpyOptSize = 8; + MaxStoresPerMemmove = 32; + MaxStoresPerMemmoveOptSize = 8; setPrefFunctionAlignment(4); - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; } } @@ -578,8 +583,18 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; - case PPCISD::LD_GOT_TPREL: return "PPCISD::LD_GOT_TPREL"; + case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; + case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; + case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; + case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; + case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; + case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; + case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; + case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; + case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; } } @@ -1342,18 +1357,65 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, if (!is64bit) llvm_unreachable("only local-exec is currently supported for ppc32"); - if (Model != TLSModel::InitialExec) - llvm_unreachable("only local-exec and initial-exec TLS modes supported"); - - SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_GOT_TPREL16_DS); - SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TLS); - SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); - SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT, - GOTOffset, GOTReg); - return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg); + if (Model == TLSModel::InitialExec) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, + PtrVT, TGA, TPOffsetHi); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + } + + if (Model == TLSModel::GeneralDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLS_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); + } + + if (Model == TLSModel::LocalDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLSLD_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, + Chain, ParmReg, TGA); + return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); + } + + llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, @@ -1687,18 +1749,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { return true; } -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1721,11 +1783,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 @@ -1848,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); - CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1909,7 +1971,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); @@ -2101,13 +2163,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + unsigned CurArgIdx = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[ArgNo].OrigArgIndex; unsigned CurArgOffset = ArgOffset; @@ -2442,6 +2507,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; + // FIXME: FuncArg and Ins[ArgNo] must reference the same argument. + // When passing anonymous aggregates, this is currently not true. + // See LowerFormalArguments_64SVR4 for a fix. Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; @@ -3264,7 +3332,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in - // PPCRegisterInfo::eliminateCallFramePseudoInstr. + // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; @@ -3280,17 +3348,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // Emit tail call. if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - assert(((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || @@ -3434,11 +3491,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, bool Result; if (Outs[i].IsFixed) { - Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, - CCInfo); + Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); } else { - Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo); + Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); } if (Result) { @@ -3451,7 +3508,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } } else { // All arguments are treated the same. - CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); + CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } // Assign locations to all of the outgoing aggregate by value arguments. @@ -3462,7 +3519,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are @@ -4356,14 +4413,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -4388,12 +4439,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); + + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, @@ -4730,12 +4786,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - std::vector<EVT> NodeTys; SDValue MFFSreg, InFlag; // Save FP Control Word to register - NodeTys.push_back(MVT::f64); // return register - NodeTys.push_back(MVT::Glue); // unused in this context + EVT NodeTys[] = { + MVT::f64, // return register + MVT::Glue // unused in this context + }; SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot @@ -4969,11 +5026,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: - // tmp = VSPLTI[bhw], result = add tmp, tmp - if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { - SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); - Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) + // If this value is in the range [17,31] and is odd, use: + // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) + // If this value is in the range [-31,-17] and is odd, use: + // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) + // Note the last two are three-instruction sequences. + if (SextVal >= -32 && SextVal <= 31) { + // To avoid having these optimizations undone by constant folding, + // we convert to a pseudo that will be expanded later into one of + // the above forms. + SDValue Elt = DAG.getConstant(SextVal, MVT::i32); + EVT VT = Op.getValueType(); + int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); + SDValue EltSize = DAG.getConstant(Size, MVT::i32); + return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is @@ -5069,23 +5136,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } - // Three instruction sequences. - - // Odd, in range [17,31]: (vsplti C)-(vsplti -16). - if (SextVal >= 0 && SextVal <= 31) { - SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). - if (SextVal >= -31 && SextVal <= 0) { - SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - return SDValue(); } @@ -5359,9 +5409,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, MVT::i32) }; - std::vector<EVT> VTs; - VTs.push_back(Op.getOperand(2).getValueType()); - VTs.push_back(MVT::Glue); + EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Now that we have the comparison, emit a copy from the CR to a GPR. @@ -6417,14 +6465,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. - std::vector<EVT> VTs; SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, MVT::i32) }; - VTs.push_back(LHS.getOperand(2).getValueType()); - VTs.push_back(MVT::Glue); + EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Unpack the result based on how the target uses it. @@ -6763,8 +6809,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && MFI->getStackSize() && - !MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::Naked); + !MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : (is31 ? PPC::R31 : PPC::R1); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, @@ -6787,16 +6833,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f54a8b77a4..3931384d89 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -178,11 +178,16 @@ namespace llvm { CR6SET, CR6UNSET, - /// G8RC = LD_GOT_TPREL Symbol, G8RReg - Used by the initial-exec + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec + /// TLS model, produces an ADDIS8 instruction that adds the GOT + /// base to sym@got@tprel@ha. + ADDIS_GOT_TPREL_HA, + + /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym@got@tprel. The latter identifies the GOT entry - /// containing the offset of "sym" relative to the thread pointer. - LD_GOT_TPREL, + /// and offset sym@got@tprel@l. This completes the addition that + /// finds the offset of "sym" relative to the thread pointer. + LD_GOT_TPREL_L, /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS /// model, produces an ADD instruction that adds the contents of @@ -192,6 +197,52 @@ namespace llvm { /// TLS sequence. ADD_TLS, + /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsgd@ha. + ADDIS_TLSGD_HA, + + /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsgd@l. + ADDI_TLSGD_L, + + /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsgd). + GET_TLS_ADDR, + + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsld@ha. + ADDIS_TLSLD_HA, + + /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsld@l. + ADDI_TLSLD_L, + + /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsld). + GET_TLSLD_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the + /// local-dynamic TLS model, produces an ADDIS8 instruction + /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// to tie this in place following a copy to %X3 from the result + /// of a GET_TLSLD_ADDR. + ADDIS_DTPREL_HA, + + /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@dtprel@l. + ADDI_DTPREL_L, + + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. + VADD_SPLAT, + /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -207,13 +258,14 @@ namespace llvm { /// or i32. LBRX, - /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces - /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha. + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, + /// produces an ADDIS8 instruction that adds the TOC base register to + /// sym@toc@ha. ADDIS_TOC_HA, - /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a - /// LD instruction with base register G8RReg and offset sym@toc@l. - /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, + /// produces a LD instruction with base register G8RReg and offset + /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. LD_TOC_L, /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces @@ -277,7 +329,7 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType virtual EVT getSetCCResultType(EVT VT) const; @@ -386,16 +438,15 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index dff15664a3..01201304f7 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -37,12 +37,10 @@ def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 let EncoderMethod = "getMemRIXEncoding"; let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); } -def tlsaddr : Operand<i64> { - let EncoderMethod = "getTLSOffsetEncoding"; -} def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; } +def tlsgd : Operand<i64> {} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -110,6 +108,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; + let isCodeGenOnly = 1 in + def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; @@ -373,7 +381,7 @@ def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), - "add $rT, $rA, $rB", IntSimple, + "add $rT, $rA, $rB@tls", IntSimple, [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>; let Defs = [CARRY] in { @@ -693,7 +701,7 @@ def : Pat<(PPCload ixaddr:$src), def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; -// Support for medium code model. +// Support for medium and large code model. def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), "#ADDIStocHA", [(set G8RC:$rD, @@ -709,13 +717,60 @@ def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64; // Support for thread-local storage. -def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg), - "#LDgotTPREL", - [(set G8RC:$rD, - (PPCldGotTprel G8RC:$reg, tglobaltlsaddr:$disp))]>, - isPPC64; +def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDISgotTprelHA", + [(set G8RC:$rD, + (PPCaddisGotTprelHA G8RC:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg), + "#LDgotTprelL", + [(set G8RC:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>, + isPPC64; def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g), (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDIStlsgdHA", + [(set G8RC:$rD, + (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDItlsgdL", + [(set G8RC:$rD, + (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set G8RC:$rD, + (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDIStlsldHA", + [(set G8RC:$rD, + (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDItlsldL", + [(set G8RC:$rD, + (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set G8RC:$rD, + (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), + "#ADDISdtprelHA", + [(set G8RC:$rD, + (PPCaddisDtprelHA G8RC:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), + "#ADDIdtprelL", + [(set G8RC:$rD, + (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; let PPC970_Unit = 2 in { // Truncating stores. diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a29f40ad53..460e94342d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -91,8 +91,19 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; -def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>; +def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; +def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, + [SDNPMayLoad]>; def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; +def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; +def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; +def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, + [SDNPHasChain]>; +def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; @@ -170,7 +181,7 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, [SDNPHasChain, SDNPMayStore]>; -// Instructions to support medium code model +// Instructions to support medium and large code model def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>; def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>; def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>; @@ -335,7 +346,7 @@ def crbitm: Operand<i8> { // Address operands def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { @@ -344,7 +355,7 @@ def memrr : Operand<iPTR> { } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); let EncoderMethod = "getMemRIXEncoding"; } diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 62d15b371f..cfcd7490ed 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -15,7 +15,7 @@ #include "PPCJITInfo.h" #include "PPCRelocations.h" #include "PPCTargetMachine.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Memory.h" @@ -115,7 +115,7 @@ asm( "lwz r2, 208(r1)\n" // stub's frame "lwz r4, 8(r2)\n" // stub's lr "li r5, 0\n" // 0 == 32 bit - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" "mtctr r3\n" // Restore all int arg registers "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" @@ -178,7 +178,7 @@ asm( "lwz 5, 104(1)\n" // stub's frame "lwz 4, 4(5)\n" // stub's lr "li 5, 0\n" // 0 == 32 bit - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "mtctr 3\n" // Restore all int arg registers "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" @@ -259,10 +259,10 @@ asm( "ld 4, 16(5)\n" // stub's lr "li 5, 1\n" // 1 == 64 bit #ifdef __ELF__ - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "nop\n" #else - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" #endif "mtctr 3\n" // Restore all int arg registers @@ -292,9 +292,10 @@ void PPC64CompilationCallback() { #endif extern "C" { -static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +LLVM_LIBRARY_VISIBILITY void * +LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 1c4af901f7..9b0df3e86a 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -114,12 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; break; - case PPCII::MO_GOT_TPREL16_DS: - RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS; - break; - case PPCII::MO_TLS: - RefKind = MCSymbolRefExpr::VK_PPC_TLS; - break; + case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + break; + case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + break; + case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + break; } // FIXME: This isn't right, but we don't have a good way to express this in diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 24caffa3f0..045b375dd8 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -71,6 +71,9 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// register for parameter passing. unsigned VarArgsNumFPR; + /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. + int CRSpillFrameIndex; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -83,7 +86,8 @@ public: VarArgsFrameIndex(0), VarArgsStackOffset(0), VarArgsNumGPR(0), - VarArgsNumFPR(0) {} + VarArgsNumFPR(0), + CRSpillFrameIndex(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -125,6 +129,9 @@ public: unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } + void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 8d1ba23c11..df245cc655 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -21,7 +21,6 @@ #include "PPCSubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -29,8 +28,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -40,7 +41,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Type.h" #include <cstdlib> #define GET_REGINFO_TARGET_DESC @@ -71,7 +71,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { + Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -111,11 +111,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; - // For 32-bit SVR4, also initialize the frame index associated with - // the CR spill slot. - if (!Subtarget.isPPC64()) - CRSpillFrameIdx = 0; - return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } @@ -222,45 +217,6 @@ PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -void PPCRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (MF.getTarget().Options.GuaranteedTailCallOpt && - I->getOpcode() == PPC::ADJCALLSTACKUP) { - // Add (actually subtract) back the amount the callee popped on return. - if (int CalleeAmt = I->getOperand(1).getImm()) { - bool is64Bit = Subtarget.isPPC64(); - CalleeAmt *= -1; - unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; - unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); - - if (isInt<16>(CalleeAmt)) { - BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addImm(CalleeAmt); - } else { - MachineBasicBlock::iterator MBBI = I; - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) - .addImm(CalleeAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) - .addImm(CalleeAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addReg(TmpReg); - } - } - } - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - /// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered /// register first and then a spilled callee-saved register if that fails. static @@ -489,19 +445,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 - // is arbitrary and will be subsequently ignored. For 32-bit, we must - // create exactly one stack slot and return its FrameIdx for all - // nonvolatiles. + // is arbitrary and will be subsequently ignored. For 32-bit, we have + // previously created the stack slot if needed, so return its FrameIdx. if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { - if (Subtarget.isPPC64()) { + if (Subtarget.isPPC64()) FrameIdx = 0; - } else if (CRSpillFrameIdx) { - FrameIdx = CRSpillFrameIdx; - } else { - MachineFrameInfo *MFI = - (const_cast<MachineFunction &>(MF)).getFrameInfo(); - FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); - CRSpillFrameIdx = FrameIdx; + else { + const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + FrameIdx = FI->getCRSpillFrameIndex(); } return true; } @@ -510,7 +461,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -524,20 +476,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - // Find out which operand is the frame index. - unsigned FIOperandNo = 0; - while (!MI.getOperand(FIOperandNo).isFI()) { - ++FIOperandNo; - assert(FIOperandNo != MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } // Take into account whether it's an add or mem instruction - unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; + unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNo-1; + OffsetOperandNo = FIOperandNum-1; // Get the frame index. - int FrameIndex = MI.getOperand(FIOperandNo).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Get the frame pointer save index. Users of this index are primarily // DYNALLOC instructions. @@ -567,7 +512,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). bool is64Bit = Subtarget.isPPC64(); - MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? + MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ? (is64Bit ? PPC::X31 : PPC::R31) : (is64Bit ? PPC::X1 : PPC::R1), false); @@ -597,7 +542,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a @@ -648,7 +594,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); + unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index a8fd796d9e..9840666242 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -30,7 +30,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { std::map<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; - mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); @@ -56,10 +55,6 @@ public: bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, @@ -69,7 +64,8 @@ public: bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 5ca387629b..8ee9b1ec9f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -63,142 +63,28 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> { field bits<5> Num = num; } - // General-purpose registers -def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>; -def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>; -def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>; -def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>; -def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>; -def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>; -def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>; -def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>; -def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>; -def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>; -def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>; -def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>; -def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>; -def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>; -def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>; -def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>; -def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>; -def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>; -def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>; -def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>; -def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>; -def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>; -def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>; -def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>; -def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>; -def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>; -def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>; -def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>; -def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>; -def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>; -def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>; -def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>; +foreach Index = 0-31 in { + def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>; +} // 64-bit General-purpose registers -def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>; -def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>; -def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>; -def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>; -def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>; -def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>; -def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>; -def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>; -def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>; -def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>; -def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>; -def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>; -def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>; -def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>; -def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>; -def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>; -def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>; -def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>; -def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>; -def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>; -def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>; -def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>; -def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>; -def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>; -def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>; -def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>; -def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>; -def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>; -def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>; -def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>; -def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>; -def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>; +foreach Index = 0-31 in { + def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>, + DwarfRegNum<[Index, -2]>; +} // Floating-point registers -def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>; -def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>; -def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>; -def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>; -def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>; -def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>; -def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>; -def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>; -def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>; -def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>; -def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>; -def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>; -def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>; -def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>; -def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>; -def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>; -def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>; -def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>; -def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>; -def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>; -def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>; -def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>; -def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>; -def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>; -def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>; -def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>; -def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>; -def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>; -def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>; -def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>; -def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>; -def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>; +foreach Index = 0-31 in { + def F#Index : FPR<Index, "f"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} // Vector registers -def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>; -def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>; -def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>; -def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>; -def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>; -def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>; -def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>; -def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>; -def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>; -def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>; -def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>; -def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>; -def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>; -def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>; -def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>; -def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>; -def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>; -def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>; -def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>; -def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>; -def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>; -def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>; -def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>; -def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>; -def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>; -def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>; -def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>; -def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>; -def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>; -def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>; -def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>; -def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>; +foreach Index = 0-31 in { + def V#Index : VR<Index, "v"#Index>, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; +} // Condition register bits def CR0LT : CRBIT< 0, "0">; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 71eff4c52a..18e4c07942 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -14,7 +14,7 @@ #include "PPCSubtarget.h" #include "PPC.h" #include "PPCRegisterInfo.h" -#include "llvm/GlobalValue.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetMachine.h" @@ -36,6 +36,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , Use64BitRegs(false) , IsPPC64(is64Bit) , HasAltivec(false) + , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) , HasISEL(false) @@ -82,6 +83,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, // Set up darwin-specific properties. if (isDarwin()) HasLazyResolverStubs = true; + + // QPX requires a 32-byte aligned stack. Note that we need to do this if + // we're compiling for a BG/Q system regardless of whether or not QPX + // is enabled because external functions will assume this alignment. + if (hasQPX() || isBGQ()) + StackAlignment = 32; } /// SetJITMode - This is called to inform the subtarget info that we are diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 416c0f3c34..15885bd2df 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -43,7 +43,12 @@ namespace PPC { DIR_A2, DIR_E500mc, DIR_E5500, + DIR_PWR3, + DIR_PWR4, + DIR_PWR5, + DIR_PWR5X, DIR_PWR6, + DIR_PWR6X, DIR_PWR7, DIR_64 }; @@ -70,6 +75,7 @@ protected: bool Use64BitRegs; bool IsPPC64; bool HasAltivec; + bool HasQPX; bool HasFSQRT; bool HasSTFIWX; bool HasISEL; @@ -150,6 +156,7 @@ public: bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } + bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } bool isBookE() const { return IsBookE; } @@ -160,6 +167,8 @@ public: bool isDarwin() const { return TargetTriple.isMacOSX(); } /// isBGP - True if this is a BG/P platform. bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } + /// isBGQ - True if this is a BG/Q platform. + bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 22a78c5f1f..fe851c1b6f 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) @@ -127,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } + +void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our PPC pass. This + // allows the PPC pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createPPCTargetTransformInfoPass(this)); +} + diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 0267ed1f23..606ccb3141 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -20,9 +20,8 @@ #include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -66,17 +63,14 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + + /// \brief Register PPC analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp new file mode 100644 index 0000000000..5e9ad347d3 --- /dev/null +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -0,0 +1,236 @@ +//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// PPC target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppctti" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializePPCTTIPass(PassRegistry &); +} + +namespace { + +class PPCTTI : public ImmutablePass, public TargetTransformInfo { + const PPCTargetMachine *TM; + const PPCSubtarget *ST; + const PPCTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + PPCTTI(const PPCTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializePPCTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", + "PPC Target Transform Info", true, true, false) +char PPCTTI::ID = 0; + +ImmutablePass * +llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { + return new PPCTTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// PPC cost model. +// +//===----------------------------------------------------------------------===// + +PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + // FIXME: PPC currently does not have custom popcnt lowering even though + // there is hardware support. Once this is fixed, update this function + // to reflect the real capabilities of the hardware. + return PSK_Software; +} + +unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasAltivec()) + return 0; + return 32; +} + +unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAltivec()) return 128; + return 0; + } + + if (ST->isPPC64()) + return 64; + return 32; + +} + +unsigned PPCTTI::getMaximumUnrollFactor() const { + unsigned Directive = ST->getDarwinDirective(); + // The 440 has no SIMD support, but floating-point instructions + // have a 5-cycle latency, so unroll by 5x for latency hiding. + if (Directive == PPC::DIR_440) + return 5; + + // The A2 has no SIMD support, but floating-point instructions + // have a 6-cycle latency, so unroll by 6x for latency hiding. + if (Directive == PPC::DIR_A2) + return 6; + + // FIXME: For lack of any better information, do no harm... + if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) + return 1; + + // For most things, modern systems have two execution units (and + // out-of-order execution). + return 2; +} + +unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); +} + +unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Estimated cost of a load-hit-store delay. This was obtained + // experimentally as a minimum needed to prevent unprofitable + // vectorization for the paq8p benchmark. It may need to be + // raised further if other unprofitable cases remain. + unsigned LHSPenalty = 12; + + // Vector element insert/extract with Altivec is very expensive, + // because they require store and reload with the attendant + // processor stall for load-hit-store. Until VSX is available, + // these need to be estimated as very costly. + if (ISD == ISD::EXTRACT_VECTOR_ELT || + ISD == ISD::INSERT_VECTOR_ELT) + return LHSPenalty + + TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // PPC in general does not support unaligned loads and stores. They'll need + // to be decomposed based on the alignment factor. + unsigned SrcBytes = LT.second.getStoreSize(); + if (SrcBytes && Alignment && Alignment < SrcBytes) + Cost *= (SrcBytes/Alignment); + + return Cost; +} + diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index 5dc8568d83..fa44331b8a 100644 --- a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; |