diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-11-01 22:04:05 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-11-01 22:04:05 +0000 |
commit | b796bbb6de19872c0c1921b8b3f05206dd33c97d (patch) | |
tree | 76e27bbc840457cb4d53aac45dc1be8219f87b5c /lib | |
parent | 1c163d2a06e94375b811f807b1667d419f5cb258 (diff) |
Add NEON VLD1-lane instructions. Partial fix for Radar 8599955.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117964 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMExpandPseudoInsts.cpp | 22 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 43 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 12 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 18 |
5 files changed, 93 insertions, 4 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 53d2e9df12..774324b452 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -110,6 +110,13 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { +{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 }, +{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, false, EvenDblSpc, 1, 4 }, +{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 }, +{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, false, EvenDblSpc, 1, 2 }, +{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 }, +{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, false, EvenDblSpc, 1, 8 }, + { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, { ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 }, @@ -476,8 +483,9 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { DstIsDead = MI.getOperand(OpIdx).isDead(); DstReg = MI.getOperand(OpIdx++).getReg(); GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 1) + MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 2) MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 3) @@ -502,7 +510,9 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { // Add the subregs as sources of the new instruction. unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | getKillRegState(MO.isKill())); - MIB.addReg(D0, SrcFlags).addReg(D1, SrcFlags); + MIB.addReg(D0, SrcFlags); + if (NumRegs > 1) + MIB.addReg(D1, SrcFlags); if (NumRegs > 2) MIB.addReg(D2, SrcFlags); if (NumRegs > 3) @@ -943,6 +953,12 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { ExpandVST(MBBI); break; + case ARM::VLD1LNq8Pseudo: + case ARM::VLD1LNq16Pseudo: + case ARM::VLD1LNq32Pseudo: + case ARM::VLD1LNq8Pseudo_UPD: + case ARM::VLD1LNq16Pseudo_UPD: + case ARM::VLD1LNq32Pseudo_UPD: case ARM::VLD2LNd8Pseudo: case ARM::VLD2LNd16Pseudo: case ARM::VLD2LNd32Pseudo: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index eadc907529..ed14814dde 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -421,6 +421,8 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>; +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 + // Classes for VLD*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. class VLDQLNPseudo<InstrItinClass itin> @@ -449,7 +451,46 @@ class VLDQQQQLNWBPseudo<InstrItinClass itin> nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; // VLD1LN : Vector Load (single element to one lane) -// FIXME: Not yet implemented. +class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag LoadOp> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst), + (ins addrmode6:$addr, DPR:$src, nohash_imm:$lane), + IIC_VLD1ln, "vld1", Dt, "\\{$dst[$lane]\\}, $addr", + "$src = $dst", + [(set DPR:$dst, (vector_insert (Ty DPR:$src), + (i32 (LoadOp addrmode6:$addr)), + imm:$lane))]>; +class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { + let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), + (i32 (LoadOp addrmode6:$addr)), + imm:$lane))]; +} + +def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8>; +def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16>; +def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load>; + +def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; +def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; +def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { + +// ...with address register writeback: +class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, + "\\{$dst[$lane]\\}, $addr$offset", + "$src = $dst, $addr.addr = $wb", []>; + +def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8">; +def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16">; +def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32">; + +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 61489d64f3..7f4d2bbd44 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -134,6 +134,8 @@ def IIC_VLD1u : InstrItinClass; def IIC_VLD1x2u : InstrItinClass; def IIC_VLD1x3u : InstrItinClass; def IIC_VLD1x4u : InstrItinClass; +def IIC_VLD1ln : InstrItinClass; +def IIC_VLD1lnu : InstrItinClass; def IIC_VLD2 : InstrItinClass; def IIC_VLD2x2 : InstrItinClass; def IIC_VLD2u : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 1f19b21e1c..b5c8def42f 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -457,6 +457,18 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<3, [A8_LSPipe]>], [2, 2, 3, 3, 2, 1]>, // + // VLD1ln + InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [3, 1, 1, 1]>, + // + // VLD1lnu + InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 1>, + InstrStage<3, [A8_LSPipe]>], + [3, 2, 1, 1, 1, 1]>, + // // VLD2 InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<2, [A8_NLSPipe], 1>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 3096b0ad99..c78f59383f 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -787,6 +787,24 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<3, [A9_LSUnit]>], [2, 2, 3, 3, 2, 1]>, // + // VLD1ln + InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 1>, + InstrStage<3, [A9_LSUnit]>], + [4, 1, 1, 1]>, + // + // VLD1lnu + InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 1>, + InstrStage<3, [A9_LSUnit]>], + [4, 2, 1, 1, 1, 1]>, + // // VLD2 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, |