aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp13
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp6
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td42
-rw-r--r--lib/Target/ARM/ARMSchedule.td2
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td12
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td18
6 files changed, 93 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index f65be1cb17..a78ed2616b 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -172,6 +172,13 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
+{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
+
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
@@ -946,6 +953,12 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
case ARM::VLD2DUPd8Pseudo_UPD:
case ARM::VLD2DUPd16Pseudo_UPD:
case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD3DUPd8Pseudo:
+ case ARM::VLD3DUPd16Pseudo:
+ case ARM::VLD3DUPd32Pseudo:
+ case ARM::VLD3DUPd8Pseudo_UPD:
+ case ARM::VLD3DUPd16Pseudo_UPD:
+ case ARM::VLD3DUPd32Pseudo_UPD:
ExpandVLD(MBBI);
break;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b9fbdc58a1..a3b86cb9db 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2361,6 +2361,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectVLDDup(N, 2, Opcodes);
}
+ case ARMISD::VLD3DUP: {
+ unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo };
+ return SelectVLDDup(N, 3, Opcodes);
+ }
+
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index fd9b823412..ebb9d5d8a7 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -896,6 +896,48 @@ def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$Rn), IIC_VLD3dup,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+ let Rm = 0b1111;
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8T : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16T : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32T : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+ (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+ "$Rn.addr = $wb", []> {
+ let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8T_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16T_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32T_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
// FIXME: Not yet implemented.
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 5202da4639..6300043114 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -152,6 +152,8 @@ def IIC_VLD3 : InstrItinClass;
def IIC_VLD3ln : InstrItinClass;
def IIC_VLD3u : InstrItinClass;
def IIC_VLD3lnu : InstrItinClass;
+def IIC_VLD3dup : InstrItinClass;
+def IIC_VLD3dupu : InstrItinClass;
def IIC_VLD4 : InstrItinClass;
def IIC_VLD4ln : InstrItinClass;
def IIC_VLD4u : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index be92562a2e..1e9ec0791a 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -559,6 +559,18 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<5, [A8_LSPipe]>],
[4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
//
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+ InstrStage<3, [A8_NLSPipe], 0>,
+ InstrStage<3, [A8_LSPipe]>],
+ [2, 2, 3, 2, 1, 1]>,
+ //
// VLD4
InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
InstrStage<4, [A8_NLSPipe], 0>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index d775a9f26f..a253b98819 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -941,6 +941,24 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<5, [A9_LSUnit]>],
[5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
//
+ // VLD3dup
+ InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
+ //
+ // VLD3dupu
+ InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+ InstrStage<1, [A9_MUX0], 0>,
+ InstrStage<1, [A9_DRegsN], 0, Required>,
+ InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1, 1]>,
+ //
// VLD4
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,