diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-10-01 20:50:58 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-10-01 20:50:58 +0000 |
commit | cae6a12a999ef9434f110950d453814ab41d2156 (patch) | |
tree | c1f3cf1a47464947a37ea9c0aef5424b5b6e0cb0 /lib/Target/ARM | |
parent | 45547b844d6a1cd69d6f71ec304948896c89bf43 (diff) |
NEON scheduling info fix. vmov reg, reg are single cycle instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115344 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 20 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA8.td | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleA9.td | 24 |
4 files changed, 39 insertions, 15 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1a4a8847d9..93e9e66077 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3177,7 +3177,7 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VMOVN, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift @@ -3284,7 +3284,7 @@ class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>; class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", + IIC_VSHLiQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>; // VNEG : Vector Negate (integer) @@ -3349,9 +3349,9 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, let neverHasSideEffects = 1 in { def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; + N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; + N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>; // Pseudo vector move instructions for QQ and QQQQ registers. This should // be expanded after register allocation is completed. @@ -3577,7 +3577,7 @@ class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy> : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", + IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src[$lane]", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; @@ -3616,11 +3616,11 @@ def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", + IIC_VMOVQ, "vdup", "32", "$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, "vmovn", "i", trunc>; // VQMOVN : Vector Saturating Narrowing Move defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, @@ -3684,7 +3684,7 @@ class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), - (ins QPR:$src), IIC_VMOVD, + (ins QPR:$src), IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; @@ -3707,7 +3707,7 @@ class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), - (ins QPR:$src), IIC_VMOVD, + (ins QPR:$src), IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; @@ -3726,7 +3726,7 @@ class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), - (ins QPR:$src), IIC_VMOVD, + (ins QPR:$src), IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 07bd0fdf28..ec7d29aac0 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -129,6 +129,7 @@ def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; def IIC_VBIND : InstrItinClass; def IIC_VBINQ : InstrItinClass; +def IIC_VMOV : InstrItinClass; def IIC_VMOVImm : InstrItinClass; def IIC_VMOVD : InstrItinClass; def IIC_VMOVQ : InstrItinClass; @@ -137,6 +138,7 @@ def IIC_VMOVID : InstrItinClass; def IIC_VMOVISL : InstrItinClass; def IIC_VMOVSI : InstrItinClass; def IIC_VMOVDI : InstrItinClass; +def IIC_VMOVN : InstrItinClass; def IIC_VPERMD : InstrItinClass; def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index ff2a673c26..d2e1df13f9 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -487,6 +487,10 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [1, 1]>, + // // Move Immediate InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NPipe]>], [3]>, @@ -521,6 +525,10 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, // + // Vector narrow move + InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 1]>, + // // Double-register Permute InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 02058618ad..8acc172668 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -801,6 +801,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_MUX0, A9_NPipe]>], [9, 3, 2, 1]>, + + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_MUX0, A9_NPipe]>], + [1,1]>, // // Move Immediate InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, @@ -819,14 +827,12 @@ def CortexA9Itineraries : ProcessorItineraries< [2, 1]>, // // Quad-register Permute Move - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available - InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_MUX0, A9_NPipe]>], - [3, 1]>, + InstrStage<1, [A9_MUX0, A9_NPipe]>], + [2, 1]>, // // Integer to Single-precision Move InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, @@ -869,6 +875,14 @@ def CortexA9Itineraries : ProcessorItineraries< [3, 1, 1]>, // + // Vector narrow move + InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_MUX0, A9_NPipe]>], + [3, 1]>, + // // Double-register FP Unary InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles |