diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-10-09 01:45:34 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-10-09 01:45:34 +0000 |
commit | 84f69e8436d522cb3a772766ba67a1d7658dfdf5 (patch) | |
tree | f023e217529142fa674132669261d86fdd90502f | |
parent | 40bb6836f64ca5c60f759896805fcc163e305a4d (diff) |
Finish vld3 and vld4.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116140 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 44 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 33 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 32 |
4 files changed, 78 insertions, 34 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 24563a1a5e..4340a7d3c7 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -344,7 +344,7 @@ def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3u, "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; @@ -352,9 +352,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; -def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3>; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3>; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3>; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; // ...with double-spaced registers (non-updating versions for disassembly only): def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; @@ -364,14 +364,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3>; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3>; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3>; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; // ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3>; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3>; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3>; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -499,7 +499,7 @@ def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, "vld3", Dt, + nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; @@ -507,16 +507,16 @@ def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; -def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3>; -def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3>; -def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3>; +def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; +def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; +def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; // ...with double-spaced registers: def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; -def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3>; -def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3>; +def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; // ...with address register writeback: class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -524,7 +524,7 @@ class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), - IIC_VLD3, "vld3", Dt, + IIC_VLD3lnu, "vld3", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", []>; @@ -533,15 +533,15 @@ def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; -def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>; -def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>; -def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>; +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; -def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>; -def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>; +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 521faa196d..73c677e0d4 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -141,6 +141,9 @@ def IIC_VLD2x2u : InstrItinClass; def IIC_VLD2ln : InstrItinClass; def IIC_VLD2lnu : InstrItinClass; def IIC_VLD3 : InstrItinClass; +def IIC_VLD3ln : InstrItinClass; +def IIC_VLD3u : InstrItinClass; +def IIC_VLD3lnu : InstrItinClass; def IIC_VLD4 : InstrItinClass; def IIC_VST : InstrItinClass; def IIC_VUNAD : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index e711839845..6c4cf8f122 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -441,39 +441,58 @@ def CortexA8Itineraries : ProcessorItineraries< [2, 2, 1]>, // // VLD2x2 - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<3, [A8_NLSPipe], 1>, InstrStage<3, [A8_LSPipe]>], [2, 2, 3, 3, 1]>, // // VLD2ln - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<3, [A8_NLSPipe], 1>, InstrStage<3, [A8_LSPipe]>], [3, 3, 1, 1, 1, 1]>, // // VLD2u - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe], 1>, InstrStage<1, [A8_LSPipe]>], [2, 2, 2, 1, 1, 1]>, // // VLD2x2u - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<3, [A8_NLSPipe], 1>, InstrStage<3, [A8_LSPipe]>], [2, 2, 3, 3, 2, 1]>, // // VLD2lnu - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<3, [A8_NLSPipe], 1>, InstrStage<3, [A8_LSPipe]>], [3, 3, 2, 1, 1, 1, 1, 1]>, // // VLD3 InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_NLSPipe]>, - InstrStage<1, [A8_LSPipe]>], [2, 2, 2, 1]>, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 1]>, + // + // VLD3ln + InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NLSPipe], 1>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 1, 1, 1, 1, 2]>, + // + // VLD3u + InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NLSPipe], 1>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 2, 1]>, + // + // VLD3lnu + InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NLSPipe], 1>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>, // // VLD4 InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index c199ef7f2b..2d2bc370f5 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -822,14 +822,36 @@ def CortexA9Itineraries : ProcessorItineraries< [4, 4, 2, 1, 1, 1, 1, 1]>, // // VLD3 - // FIXME: We don't model this instruction properly InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_NPipe]>], - [2, 2, 2, 1]>, + InstrStage<4, [A9_NPipe]>], + [4, 4, 5, 1]>, + // + // VLD3ln + InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<5, [A9_NPipe]>], + [5, 5, 6, 1, 1, 1, 1, 2]>, + // + // VLD3u + InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<4, [A9_NPipe]>], + [4, 4, 5, 2, 1]>, + // + // VLD3lnu + InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<5, [A9_NPipe]>], + [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, // // VLD4 // FIXME: We don't model this instruction properly |