diff options
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 595 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 231 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 147 | ||||
-rw-r--r-- | lib/Target/ARM/NEONPreAllocPass.cpp | 138 |
4 files changed, 624 insertions, 487 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 3988a84eed..38535dc665 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -24,13 +24,6 @@ using namespace llvm; namespace { class ARMExpandPseudo : public MachineFunctionPass { - // Constants for register spacing in NEON load/store instructions. - enum NEONRegSpacing { - SingleSpc, - EvenDblSpc, - OddDblSpc - }; - public: static char ID; ARMExpandPseudo() : MachineFunctionPass(ID) {} @@ -48,10 +41,9 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); - void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc, - bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); - void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc, - bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); + void ExpandVLD(MachineBasicBlock::iterator &MBBI); + void ExpandVST(MachineBasicBlock::iterator &MBBI); + void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); }; char ARMExpandPseudo::ID = 0; } @@ -73,37 +65,289 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, } } +namespace { + // Constants for register spacing in NEON load/store instructions. + // For quad-register load-lane and store-lane pseudo instructors, the + // spacing is initially assumed to be EvenDblSpc, and that is changed to + // OddDblSpc depending on the lane number operand. + enum NEONRegSpacing { + SingleSpc, + EvenDblSpc, + OddDblSpc + }; + + // Entries for NEON load/store information table. The table is sorted by + // PseudoOpc for fast binary-search lookups. + struct NEONLdStTableEntry { + unsigned PseudoOpc; + unsigned RealOpc; + bool IsLoad; + bool HasWriteBack; + NEONRegSpacing RegSpacing; + unsigned char NumRegs; // D registers loaded or stored + unsigned char RegElts; // elements per D register; used for lane ops + + // Comparison methods for binary search of the table. + bool operator<(const NEONLdStTableEntry &TE) const { + return PseudoOpc < TE.PseudoOpc; + } + friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { + return TE.PseudoOpc < PseudoOpc; + } + friend bool ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, + const NEONLdStTableEntry &TE) { + return PseudoOpc < TE.PseudoOpc; + } + }; +} + +static const NEONLdStTableEntry NEONLdStTable[] = { +{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, +{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, +{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 }, +{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 }, + +{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 }, +{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 }, +{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 }, +{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 }, +{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 }, +{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 }, +{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 }, +{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 }, + +{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 }, +{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 }, +{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 }, +{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 }, +{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 }, +{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 }, +{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 }, +{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 }, +{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 }, +{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 }, + +{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 }, +{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 }, +{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 }, +{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 }, +{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 }, +{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 }, + +{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 }, +{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 }, +{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 }, +{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 }, +{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 }, +{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 }, + +{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 }, +{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 }, +{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 }, +{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 }, +{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 }, +{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 }, +{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 }, +{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 }, +{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 }, +{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 }, + +{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 }, +{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 }, +{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 }, +{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 }, +{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 }, +{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 }, + +{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 }, +{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 }, +{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 }, +{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 }, +{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 }, +{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 }, + +{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 }, +{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 }, +{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 }, +{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 }, +{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 }, +{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 }, +{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 }, +{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 }, +{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 }, +{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 }, + +{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 }, +{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 }, +{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 }, +{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 }, +{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 }, +{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 }, + +{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 }, +{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 }, +{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 }, +{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 }, +{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 }, +{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 }, + +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 }, +{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 }, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 }, +{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 }, + +{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 }, +{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 }, +{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 }, +{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 }, +{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 }, +{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 }, +{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 }, +{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 }, + +{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 }, +{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 }, +{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 }, +{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 }, +{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 }, +{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 }, +{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4}, +{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4}, +{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2}, +{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2}, + +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 }, +{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 }, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 }, +{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 }, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 }, +{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 }, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 }, +{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 }, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 }, +{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 }, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 }, +{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 }, + +{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 }, +{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 }, +{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 }, +{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 }, +{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 }, +{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 }, +{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4}, +{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4}, +{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2}, +{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2}, + +{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 }, +{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 }, +{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 }, +{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 }, +{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 }, +{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 }, + +{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 }, +{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 }, +{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 }, +{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 }, +{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 }, +{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 }, + +{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 }, +{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 }, +{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 }, +{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 }, +{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 }, +{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 }, +{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4}, +{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4}, +{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2}, +{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2}, + +{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 }, +{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 }, +{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 }, +{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 }, +{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 }, +{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 }, + +{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 }, +{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 }, +{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 }, +{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 }, +{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 }, +{ ARM::VST4q8oddPseudo_UPD , ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 } +}; + +/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON +/// load or store pseudo instruction. +static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { + unsigned NumEntries = array_lengthof(NEONLdStTable); + +#ifndef NDEBUG + // Make sure the table is sorted. + static bool TableChecked = false; + if (!TableChecked) { + for (unsigned i = 0; i != NumEntries-1; ++i) + assert(NEONLdStTable[i] < NEONLdStTable[i+1] && + "NEONLdStTable is not sorted!"); + TableChecked = true; + } +#endif + + const NEONLdStTableEntry *I = + std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode); + if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode) + return I; + return NULL; +} + +/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, +/// corresponding to the specified register spacing. Not all of the results +/// are necessarily valid, e.g., a Q register only has 2 D subregisters. +static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, + const TargetRegisterInfo *TRI, unsigned &D0, + unsigned &D1, unsigned &D2, unsigned &D3) { + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(Reg, ARM::dsub_0); + D1 = TRI->getSubReg(Reg, ARM::dsub_1); + D2 = TRI->getSubReg(Reg, ARM::dsub_2); + D3 = TRI->getSubReg(Reg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(Reg, ARM::dsub_0); + D1 = TRI->getSubReg(Reg, ARM::dsub_2); + D2 = TRI->getSubReg(Reg, ARM::dsub_4); + D3 = TRI->getSubReg(Reg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing"); + D0 = TRI->getSubReg(Reg, ARM::dsub_1); + D1 = TRI->getSubReg(Reg, ARM::dsub_3); + D2 = TRI->getSubReg(Reg, ARM::dsub_5); + D3 = TRI->getSubReg(Reg, ARM::dsub_7); + } +} + /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register /// operands to real VLD instructions with D register operands. -void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool hasWriteBack, - NEONRegSpacing RegSpc, unsigned NumRegs) { +void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); + assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); + NEONRegSpacing RegSpc = TableEntry->RegSpacing; + unsigned NumRegs = TableEntry->NumRegs; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; bool DstIsDead = MI.getOperand(OpIdx).isDead(); unsigned DstReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; - if (RegSpc == SingleSpc) { - D0 = TRI->getSubReg(DstReg, ARM::dsub_0); - D1 = TRI->getSubReg(DstReg, ARM::dsub_1); - D2 = TRI->getSubReg(DstReg, ARM::dsub_2); - D3 = TRI->getSubReg(DstReg, ARM::dsub_3); - } else if (RegSpc == EvenDblSpc) { - D0 = TRI->getSubReg(DstReg, ARM::dsub_0); - D1 = TRI->getSubReg(DstReg, ARM::dsub_2); - D2 = TRI->getSubReg(DstReg, ARM::dsub_4); - D3 = TRI->getSubReg(DstReg, ARM::dsub_6); - } else { - assert(RegSpc == OddDblSpc && "unknown register spacing for VLD"); - D0 = TRI->getSubReg(DstReg, ARM::dsub_1); - D1 = TRI->getSubReg(DstReg, ARM::dsub_3); - D2 = TRI->getSubReg(DstReg, ARM::dsub_5); - D3 = TRI->getSubReg(DstReg, ARM::dsub_7); - } + GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 2) @@ -111,14 +355,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, if (NumRegs > 3) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); - if (hasWriteBack) + if (TableEntry->HasWriteBack) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (hasWriteBack) + if (TableEntry->HasWriteBack) MIB.addOperand(MI.getOperand(OpIdx++)); MIB = AddDefaultPred(MIB); @@ -138,45 +382,32 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register /// operands to real VST instructions with D register operands. -void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool hasWriteBack, - NEONRegSpacing RegSpc, unsigned NumRegs) { +void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); + assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); + NEONRegSpacing RegSpc = TableEntry->RegSpacing; + unsigned NumRegs = TableEntry->NumRegs; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; - if (hasWriteBack) + if (TableEntry->HasWriteBack) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (hasWriteBack) + if (TableEntry->HasWriteBack) MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx).getReg(); unsigned D0, D1, D2, D3; - if (RegSpc == SingleSpc) { - D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); - D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); - D2 = TRI->getSubReg(SrcReg, ARM::dsub_2); - D3 = TRI->getSubReg(SrcReg, ARM::dsub_3); - } else if (RegSpc == EvenDblSpc) { - D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); - D1 = TRI->getSubReg(SrcReg, ARM::dsub_2); - D2 = TRI->getSubReg(SrcReg, ARM::dsub_4); - D3 = TRI->getSubReg(SrcReg, ARM::dsub_6); - } else { - assert(RegSpc == OddDblSpc && "unknown register spacing for VST"); - D0 = TRI->getSubReg(SrcReg, ARM::dsub_1); - D1 = TRI->getSubReg(SrcReg, ARM::dsub_3); - D2 = TRI->getSubReg(SrcReg, ARM::dsub_5); - D3 = TRI->getSubReg(SrcReg, ARM::dsub_7); - } - + GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0).addReg(D1); if (NumRegs > 2) MIB.addReg(D2); @@ -190,6 +421,85 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, MI.eraseFromParent(); } +/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ +/// register operands to real instructions with D register operands. +void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); + assert(TableEntry && "NEONLdStTable lookup failed"); + NEONRegSpacing RegSpc = TableEntry->RegSpacing; + unsigned NumRegs = TableEntry->NumRegs; + unsigned RegElts = TableEntry->RegElts; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(TableEntry->RealOpc)); + unsigned OpIdx = 0; + // The lane operand is always the 3rd from last operand, before the 2 + // predicate operands. + unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); + + // Adjust the lane and spacing as needed for Q registers. + assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); + if (RegSpc == EvenDblSpc && Lane >= RegElts) { + RegSpc = OddDblSpc; + Lane -= RegElts; + } + assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); + + unsigned DstReg, D0, D1, D2, D3; + bool DstIsDead; + if (TableEntry->IsLoad) { + DstIsDead = MI.getOperand(OpIdx).isDead(); + DstReg = MI.getOperand(OpIdx++).getReg(); + GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2) + MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 3) + MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); + } + + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the addrmode6 operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + // Copy the am6offset operand. + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Grab the super-register source. + MachineOperand MO = MI.getOperand(OpIdx++); + if (!TableEntry->IsLoad) + GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); + + // Add the subregs as sources of the new instruction. + unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | + getKillRegState(MO.isKill())); + MIB.addReg(D0, SrcFlags).addReg(D1, SrcFlags); + if (NumRegs > 2) + MIB.addReg(D2, SrcFlags); + if (NumRegs > 3) + MIB.addReg(D3, SrcFlags); + + // Add the lane number operand. + MIB.addImm(Lane); + + MIB = AddDefaultPred(MIB); + // Copy the super-register source to be an implicit source. + MO.setImplicit(true); + MIB.addOperand(MO); + if (TableEntry->IsLoad) + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -292,204 +602,169 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } case ARM::VLD1q8Pseudo: - ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break; case ARM::VLD1q16Pseudo: - ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break; case ARM::VLD1q32Pseudo: - ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break; case ARM::VLD1q64Pseudo: - ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break; case ARM::VLD1q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break; case ARM::VLD1q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break; case ARM::VLD1q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break; case ARM::VLD1q64Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break; - case ARM::VLD2d8Pseudo: - ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break; case ARM::VLD2d16Pseudo: - ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break; case ARM::VLD2d32Pseudo: - ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break; case ARM::VLD2q8Pseudo: - ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break; case ARM::VLD2q16Pseudo: - ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break; case ARM::VLD2q32Pseudo: - ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break; case ARM::VLD2d8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break; case ARM::VLD2d16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break; case ARM::VLD2d32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break; case ARM::VLD2q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break; case ARM::VLD2q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break; case ARM::VLD2q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break; - case ARM::VLD3d8Pseudo: - ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break; case ARM::VLD3d16Pseudo: - ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break; case ARM::VLD3d32Pseudo: - ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break; case ARM::VLD1d64TPseudo: - ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break; case ARM::VLD3d8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break; case ARM::VLD3d16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break; case ARM::VLD3d32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break; case ARM::VLD1d64TPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break; case ARM::VLD3q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break; case ARM::VLD3q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break; case ARM::VLD3q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break; case ARM::VLD3q8oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break; case ARM::VLD3q16oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break; case ARM::VLD3q32oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break; - case ARM::VLD4d8Pseudo: - ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break; case ARM::VLD4d16Pseudo: - ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break; case ARM::VLD4d32Pseudo: - ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break; case ARM::VLD1d64QPseudo: - ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break; case ARM::VLD4d8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break; case ARM::VLD4d16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break; case ARM::VLD4d32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break; case ARM::VLD1d64QPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break; case ARM::VLD4q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break; case ARM::VLD4q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break; case ARM::VLD4q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break; case ARM::VLD4q8oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break; case ARM::VLD4q16oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break; case ARM::VLD4q32oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break; + ExpandVLD(MBBI); + break; case ARM::VST1q8Pseudo: - ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; case ARM::VST1q16Pseudo: - ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break; case ARM::VST1q32Pseudo: - ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break; case ARM::VST1q64Pseudo: - ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break; case ARM::VST1q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break; case ARM::VST1q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break; case ARM::VST1q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break; case ARM::VST1q64Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break; - case ARM::VST2d8Pseudo: - ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break; case ARM::VST2d16Pseudo: - ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break; case ARM::VST2d32Pseudo: - ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break; case ARM::VST2q8Pseudo: - ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break; case ARM::VST2q16Pseudo: - ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break; case ARM::VST2q32Pseudo: - ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break; case ARM::VST2d8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break; case ARM::VST2d16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break; case ARM::VST2d32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break; case ARM::VST2q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break; case ARM::VST2q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break; case ARM::VST2q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break; - case ARM::VST3d8Pseudo: - ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; case ARM::VST3d16Pseudo: - ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break; case ARM::VST3d32Pseudo: - ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break; case ARM::VST1d64TPseudo: - ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break; case ARM::VST3d8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break; case ARM::VST3d16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break; case ARM::VST3d32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break; case ARM::VST1d64TPseudo_UPD: - ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break; case ARM::VST3q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break; case ARM::VST3q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break; case ARM::VST3q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break; case ARM::VST3q8oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break; case ARM::VST3q16oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break; case ARM::VST3q32oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break; - case ARM::VST4d8Pseudo: - ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break; case ARM::VST4d16Pseudo: - ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break; case ARM::VST4d32Pseudo: - ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break; case ARM::VST1d64QPseudo: - ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break; case ARM::VST4d8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break; case ARM::VST4d16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break; case ARM::VST4d32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break; case ARM::VST1d64QPseudo_UPD: - ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break; case ARM::VST4q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q8oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break; case ARM::VST4q16oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break; case ARM::VST4q32oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break; + ExpandVST(MBBI); + break; + + case ARM::VLD2LNd8Pseudo: + case ARM::VLD2LNd16Pseudo: + case ARM::VLD2LNd32Pseudo: + case ARM::VLD2LNq16Pseudo: + case ARM::VLD2LNq32Pseudo: + case ARM::VLD2LNd8Pseudo_UPD: + case ARM::VLD2LNd16Pseudo_UPD: + case ARM::VLD2LNd32Pseudo_UPD: + case ARM::VLD2LNq16Pseudo_UPD: + case ARM::VLD2LNq32Pseudo_UPD: + case ARM::VLD3LNd8Pseudo: + case ARM::VLD3LNd16Pseudo: + case ARM::VLD3LNd32Pseudo: + case ARM::VLD3LNq16Pseudo: + case ARM::VLD3LNq32Pseudo: + case ARM::VLD3LNd8Pseudo_UPD: + case ARM::VLD3LNd16Pseudo_UPD: + case ARM::VLD3LNd32Pseudo_UPD: + case ARM::VLD3LNq16Pseudo_UPD: + case ARM::VLD3LNq32Pseudo_UPD: + case ARM::VLD4LNd8Pseudo: + case ARM::VLD4LNd16Pseudo: + case ARM::VLD4LNd32Pseudo: + case ARM::VLD4LNq16Pseudo: + case ARM::VLD4LNq32Pseudo: + case ARM::VLD4LNd8Pseudo_UPD: + case ARM::VLD4LNd16Pseudo_UPD: + case ARM::VLD4LNd32Pseudo_UPD: + case ARM::VLD4LNq16Pseudo_UPD: + case ARM::VLD4LNq32Pseudo_UPD: + case ARM::VST2LNd8Pseudo: + case ARM::VST2LNd16Pseudo: + case ARM::VST2LNd32Pseudo: + case ARM::VST2LNq16Pseudo: + case ARM::VST2LNq32Pseudo: + case ARM::VST2LNd8Pseudo_UPD: + case ARM::VST2LNd16Pseudo_UPD: + case ARM::VST2LNd32Pseudo_UPD: + case ARM::VST2LNq16Pseudo_UPD: + case ARM::VST2LNq32Pseudo_UPD: + case ARM::VST3LNd8Pseudo: + case ARM::VST3LNd16Pseudo: + case ARM::VST3LNd32Pseudo: + case ARM::VST3LNq16Pseudo: + case ARM::VST3LNq32Pseudo: + case ARM::VST3LNd8Pseudo_UPD: + case ARM::VST3LNd16Pseudo_UPD: + case ARM::VST3LNd32Pseudo_UPD: + case ARM::VST3LNq16Pseudo_UPD: + case ARM::VST3LNq32Pseudo_UPD: + case ARM::VST4LNd8Pseudo: + case ARM::VST4LNd16Pseudo: + case ARM::VST4LNd32Pseudo: + case ARM::VST4LNq16Pseudo: + case ARM::VST4LNq32Pseudo: + case ARM::VST4LNd8Pseudo_UPD: + case ARM::VST4LNd16Pseudo_UPD: + case ARM::VST4LNd32Pseudo_UPD: + case ARM::VST4LNq16Pseudo_UPD: + case ARM::VST4LNq32Pseudo_UPD: + ExpandLaneOp(MBBI); + break; } if (ModifiedOp) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 456311ff25..a477344f37 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -151,10 +151,9 @@ private: /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for - /// load/store of D registers and even subregs and odd subregs of Q registers. + /// load/store of D registers and Q registers. SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs, - unsigned *DOpcodes, unsigned *QOpcodes0, - unsigned *QOpcodes1); + unsigned *DOpcodes, unsigned *QOpcodes); /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be @@ -196,10 +195,6 @@ private: SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - - // Form sequences of 8 consecutive D registers. - SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, - SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; } @@ -1015,39 +1010,6 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); } -/// OctoDRegs - Form 8 consecutive D registers. -/// -SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, - SDValue |