aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IntrinsicsARM.td134
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp9
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp18
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td72
-rw-r--r--test/CodeGen/ARM/vabd.ll8
-rw-r--r--test/CodeGen/ARM/vabs.ll8
-rw-r--r--test/CodeGen/ARM/vld1.ll40
-rw-r--r--test/CodeGen/ARM/vld2.ll16
-rw-r--r--test/CodeGen/ARM/vld3.ll16
-rw-r--r--test/CodeGen/ARM/vld4.ll16
-rw-r--r--test/CodeGen/ARM/vmax.ll8
-rw-r--r--test/CodeGen/ARM/vmin.ll8
-rw-r--r--test/CodeGen/ARM/vpmax.ll4
-rw-r--r--test/CodeGen/ARM/vpmin.ll4
-rw-r--r--test/CodeGen/ARM/vrecpe.ll8
-rw-r--r--test/CodeGen/ARM/vrsqrte.ll8
-rw-r--r--test/CodeGen/ARM/vst1.ll40
-rw-r--r--test/CodeGen/ARM/vst2.ll16
-rw-r--r--test/CodeGen/ARM/vst3.ll16
-rw-r--r--test/CodeGen/ARM/vst4.ll16
-rw-r--r--test/CodeGen/ARM/vtrn.ll32
-rw-r--r--test/CodeGen/ARM/vuzp.ll32
-rw-r--r--test/CodeGen/ARM/vzip.ll32
23 files changed, 253 insertions, 308 deletions
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index d86dd08710..7b72082763 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -27,53 +27,42 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
// The following classes do not correspond directly to GCC builtins.
class Neon_1Arg_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class Neon_1Arg_Float_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
class Neon_1Arg_Narrow_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
class Neon_1Arg_Long_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMTruncatedElementVectorType<0>], [IntrNoMem]>;
class Neon_2Arg_Intrinsic
- : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem]>;
- class Neon_2Arg_Float_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem]>;
- class Neon_2Arg_Vector_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class Neon_2Arg_Narrow_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMExtendedElementVectorType<0>,
LLVMExtendedElementVectorType<0>],
[IntrNoMem]>;
class Neon_2Arg_Long_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMTruncatedElementVectorType<0>,
LLVMTruncatedElementVectorType<0>],
[IntrNoMem]>;
class Neon_2Arg_Wide_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMTruncatedElementVectorType<0>],
[IntrNoMem]>;
class Neon_3Arg_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
class Neon_3Arg_Long_Intrinsic
- : Intrinsic<[llvm_anyint_ty],
+ : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMTruncatedElementVectorType<0>,
LLVMTruncatedElementVectorType<0>],
[IntrNoMem]>;
class Neon_2Result_Intrinsic
- : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
- [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class Neon_2Result_Float_Intrinsic
- : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>],
+ : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
class Neon_CvtFxToFP_Intrinsic
: Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -141,18 +130,16 @@ let Properties = [IntrNoMem, Commutative] in {
// Vector Maximum.
def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
- def int_arm_neon_vmaxf : Neon_2Arg_Float_Intrinsic;
// Vector Minimum.
def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
- def int_arm_neon_vminf : Neon_2Arg_Float_Intrinsic;
// Vector Reciprocal Step.
- def int_arm_neon_vrecps : Neon_2Arg_Float_Intrinsic;
+ def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
// Vector Reciprocal Square Root Step.
- def int_arm_neon_vrsqrts : Neon_2Arg_Float_Intrinsic;
+ def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
}
// Vector Subtract.
@@ -186,7 +173,6 @@ let TargetPrefix = "arm" in {
// Vector Absolute Differences.
def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vabdf : Neon_2Arg_Float_Intrinsic;
def int_arm_neon_vabdls : Neon_2Arg_Long_Intrinsic;
def int_arm_neon_vabdlu : Neon_2Arg_Long_Intrinsic;
@@ -197,16 +183,16 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic;
def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
// Vector Pairwise Add.
-def int_arm_neon_vpadd : Neon_2Arg_Vector_Intrinsic;
+def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
// Vector Pairwise Add Long.
// Note: This is different than the other "long" NEON intrinsics because
// the result vector has half as many elements as the source vector.
// The source and destination vector types must be specified separately.
let TargetPrefix = "arm" in {
- def int_arm_neon_vpaddls : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty],
+ def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
[IntrNoMem]>;
- def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty],
+ def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
[IntrNoMem]>;
}
@@ -214,21 +200,19 @@ let TargetPrefix = "arm" in {
// Note: This is similar to vpaddl but the destination vector also appears
// as the first argument.
let TargetPrefix = "arm" in {
- def int_arm_neon_vpadals : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_anyint_ty],
+ def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty],
[IntrNoMem]>;
- def int_arm_neon_vpadalu : Intrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, llvm_anyint_ty],
+ def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty],
[IntrNoMem]>;
}
// Vector Pairwise Maximum and Minimum.
def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpmaxf : Neon_2Arg_Float_Intrinsic;
def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vpminf : Neon_2Arg_Float_Intrinsic;
// Vector Shifts:
//
@@ -283,7 +267,6 @@ def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
// Vector Absolute Value and Saturating Absolute Value.
def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
-def int_arm_neon_vabsf : Neon_1Arg_Float_Intrinsic;
def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
// Vector Saturating Negate.
@@ -298,11 +281,9 @@ def int_arm_neon_vcnt : Neon_1Arg_Intrinsic;
// Vector Reciprocal Estimate.
def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrecpef : Neon_1Arg_Float_Intrinsic;
// Vector Reciprocal Square Root Estimate.
def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
-def int_arm_neon_vrsqrtef : Neon_1Arg_Float_Intrinsic;
// Vector Conversions Between Floating-point and Fixed-point.
def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
@@ -331,68 +312,41 @@ def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
// Vector Transpose.
-def int_arm_neon_vtrni : Neon_2Result_Intrinsic;
-def int_arm_neon_vtrnf : Neon_2Result_Float_Intrinsic;
+def int_arm_neon_vtrn : Neon_2Result_Intrinsic;
// Vector Interleave (vzip).
-def int_arm_neon_vzipi : Neon_2Result_Intrinsic;
-def int_arm_neon_vzipf : Neon_2Result_Float_Intrinsic;
+def int_arm_neon_vzip : Neon_2Result_Intrinsic;
// Vector Deinterleave (vuzp).
-def int_arm_neon_vuzpi : Neon_2Result_Intrinsic;
-def int_arm_neon_vuzpf : Neon_2Result_Float_Intrinsic;
+def int_arm_neon_vuzp : Neon_2Result_Intrinsic;
let TargetPrefix = "arm" in {
// De-interleaving vector loads from N-element structures.
- def int_arm_neon_vld1i : Intrinsic<[llvm_anyint_ty],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld1f : Intrinsic<[llvm_anyfloat_ty],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld2i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld2f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld3i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>,
- LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld3f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>,
- LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld4i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
- def int_arm_neon_vld4f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
// Interleaving vector stores from N-element structures.
- def int_arm_neon_vst1i : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyint_ty],
- [IntrWriteArgMem]>;
- def int_arm_neon_vst1f : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyfloat_ty],
+ def int_arm_neon_vst1 : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyvector_ty],
+ [IntrWriteArgMem]>;
+ def int_arm_neon_vst2 : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<0>], [IntrWriteArgMem]>;
+ def int_arm_neon_vst3 : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<0>, LLVMMatchType<0>],
[IntrWriteArgMem]>;
- def int_arm_neon_vst2i : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyint_ty,
- LLVMMatchType<0>], [IntrWriteArgMem]>;
- def int_arm_neon_vst2f : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyfloat_ty,
- LLVMMatchType<0>], [IntrWriteArgMem]>;
- def int_arm_neon_vst3i : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyint_ty,
- LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrWriteArgMem]>;
- def int_arm_neon_vst3f : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyfloat_ty,
- LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrWriteArgMem]>;
- def int_arm_neon_vst4i : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyint_ty,
- LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>], [IntrWriteArgMem]>;
- def int_arm_neon_vst4f : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyfloat_ty,
- LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>], [IntrWriteArgMem]>;
+ def int_arm_neon_vst4 : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyvector_ty,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>], [IntrWriteArgMem]>;
}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1096e8eb01..a927da2a38 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1466,8 +1466,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
switch (IntNo) {
default: break;
- case Intrinsic::arm_neon_vtrni:
- case Intrinsic::arm_neon_vtrnf:
+ case Intrinsic::arm_neon_vtrn:
switch (VT.getSimpleVT()) {
default: return NULL;
case EVT::v8i8: Opc = ARM::VTRNd8; break;
@@ -1482,8 +1481,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1),
N->getOperand(2));
- case Intrinsic::arm_neon_vuzpi:
- case Intrinsic::arm_neon_vuzpf:
+ case Intrinsic::arm_neon_vuzp:
switch (VT.getSimpleVT()) {
default: return NULL;
case EVT::v8i8: Opc = ARM::VUZPd8; break;
@@ -1498,8 +1496,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1),
N->getOperand(2));
- case Intrinsic::arm_neon_vzipi:
- case Intrinsic::arm_neon_vzipf:
+ case Intrinsic::arm_neon_vzip:
switch (VT.getSimpleVT()) {
default: return NULL;
case EVT::v8i8: Opc = ARM::VZIPd8; break;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 61722d44fa..1a662d9d87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1360,23 +1360,17 @@ SDValue
ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
- case Intrinsic::arm_neon_vld2i:
- case Intrinsic::arm_neon_vld2f:
+ case Intrinsic::arm_neon_vld2:
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D);
- case Intrinsic::arm_neon_vld3i:
- case Intrinsic::arm_neon_vld3f:
+ case Intrinsic::arm_neon_vld3:
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D);
- case Intrinsic::arm_neon_vld4i:
- case Intrinsic::arm_neon_vld4f:
+ case Intrinsic::arm_neon_vld4:
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D);
- case Intrinsic::arm_neon_vst2i:
- case Intrinsic::arm_neon_vst2f:
+ case Intrinsic::arm_neon_vst2:
return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2);
- case Intrinsic::arm_neon_vst3i:
- case Intrinsic::arm_neon_vst3f:
+ case Intrinsic::arm_neon_vst3:
return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3);
- case Intrinsic::arm_neon_vst4i:
- case Intrinsic::arm_neon_vst4f:
+ case Intrinsic::arm_neon_vst4:
return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4);
default: return SDValue(); // Don't custom lower most intrinsics.
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 1ed3a619a0..53283e84ea 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -173,17 +173,17 @@ class VLD1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
!strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"),
[(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vld1i>;
-def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1i>;
-def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1i>;
-def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1f>;
-def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1i>;
-
-def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vld1i>;
-def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1i>;
-def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1i>;
-def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1f>;
-def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1i>;
+def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vld1>;
+def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1>;
+def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1>;
+def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1>;
+def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1>;
+
+def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vld1>;
+def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1>;
+def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1>;
+def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1>;
+def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<string OpcodeStr>
@@ -228,17 +228,17 @@ class VST1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
!strconcat(OpcodeStr, "\t${src:dregpair}, $addr"),
[(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
-def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1i>;
-def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1i>;
-def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1i>;
-def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1f>;
-def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1i>;
+def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1>;
+def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1>;
+def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1>;
+def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1>;
+def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1>;
-def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1i>;
-def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1i>;
-def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>;
-def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
-def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
+def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1>;
+def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1>;
+def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1>;
+def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1>;
+def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<string OpcodeStr>
@@ -1223,9 +1223,9 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32,
- int_arm_neon_vabdf, 0>;
+ int_arm_neon_vabds, 0>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32,
- int_arm_neon_vabdf, 0>;
+ int_arm_neon_vabds, 0>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>;
@@ -1245,17 +1245,17 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>;
defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>;
defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32,
- int_arm_neon_vmaxf, 1>;
+ int_arm_neon_vmaxs, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32,
- int_arm_neon_vmaxf, 1>;
+ int_arm_neon_vmaxs, 1>;
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>;
defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32,
- int_arm_neon_vminf, 1>;
+ int_arm_neon_vmins, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32,
- int_arm_neon_vminf, 1>;
+ int_arm_neon_vmins, 1>;
// Vector Pairwise Operations.
@@ -1295,7 +1295,7 @@ def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16,
def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32,
int_arm_neon_vpmaxu, 0>;
def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32,
- int_arm_neon_vpmaxf, 0>;
+ int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8,
@@ -1311,7 +1311,7 @@ def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16,
def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32,
int_arm_neon_vpminu, 0>;
def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32,
- int_arm_neon_vpminf, 0>;
+ int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -1321,9 +1321,9 @@ def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
v4i32, v4i32, int_arm_neon_vrecpe>;
def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
- v2f32, v2f32, int_arm_neon_vrecpef>;
+ v2f32, v2f32, int_arm_neon_vrecpe>;
def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
- v4f32, v4f32, int_arm_neon_vrecpef>;
+ v4f32, v4f32, int_arm_neon_vrecpe>;
// VRECPS : Vector Reciprocal Step
def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32,
@@ -1337,9 +1337,9 @@ def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
v4i32, v4i32, int_arm_neon_vrsqrte>;
def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
- v2f32, v2f32, int_arm_neon_vrsqrtef>;
+ v2f32, v2f32, int_arm_neon_vrsqrte>;
def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
- v4f32, v4f32, int_arm_neon_vrsqrtef>;
+ v4f32, v4f32, int_arm_neon_vrsqrte>;
// VRSQRTS : Vector Reciprocal Square Root Step
def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32,
@@ -1480,9 +1480,9 @@ defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>;
defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s",
int_arm_neon_vabs>;
def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
- v2f32, v2f32, int_arm_neon_vabsf>;
+ v2f32, v2f32, int_arm_neon_vabs>;
def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
- v4f32, v4f32, int_arm_neon_vabsf>;
+ v4f32, v4f32, int_arm_neon_vabs>;
// VQABS : Vector Saturating Absolute Value
defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
@@ -2017,7 +2017,7 @@ def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
- v2f32, v2f32, int_arm_neon_vabsf>;
+ v2f32, v2f32, int_arm_neon_vabs>;
def : N2VDIntsPat<fabs, VABSfd_sfp>;
// Vector Negate used for single-precision FP
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index c0497f9134..e764840154 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -59,7 +59,7 @@ define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vabd.f32
%tmp1 = load <2 x float>* %A
%tmp2 = load <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ %tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -122,7 +122,7 @@ define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK: vabd.f32
%tmp1 = load <4 x float>* %A
%tmp2 = load <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ %tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -134,7 +134,7 @@ declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -144,4 +144,4 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read
declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index a7979eec52..1195f087ef 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -28,7 +28,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
;CHECK: vabsf32:
;CHECK: vabs.f32
%tmp1 = load <2 x float>* %A
- %tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1)
+ %tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
ret <2 x float> %tmp2
}
@@ -60,17 +60,17 @@ define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
;CHECK: vabsQf32:
;CHECK: vabs.f32
%tmp1 = load <4 x float>* %A
- %tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1)
+ %tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
ret <4 x float> %tmp2
}
declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index d5191338c9..81f1bdec9e 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -3,81 +3,81 @@
define <8 x i8> @vld1i8(i8* %A) nounwind {
;CHECK: vld1i8:
;CHECK: vld1.8
- %tmp1 = call <8 x i8> @llvm.arm.neon.vld1i.v8i8(i8* %A)
+ %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
ret <8 x i8> %tmp1
}
define <4 x i16> @vld1i16(i16* %A) nounwind {
;CHECK: vld1i16:
;CHECK: vld1.16
- %tmp1 = call <4 x i16> @llvm.arm.neon.vld1i.v4i16(i16* %A)
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
ret <4 x i16> %tmp1
}
define <2 x i32> @vld1i32(i32* %A) nounwind {
;CHECK: vld1i32:
;CHECK: vld1.32
- %tmp1 = call <2 x i32> @llvm.arm.neon.vld1i.v2i32(i32* %A)
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
ret <2 x i32> %tmp1
}
define <2 x float> @vld1f(float* %A) nounwind {
;CHECK: vld1f:
;CHECK: vld1.32
- %tmp1 = call <2 x float> @llvm.arm.neon.vld1f.v2f32(float* %A)
+ %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
ret <2 x float> %tmp1
}
define <1 x i64> @vld1i64(i64* %A) nounwind {
;CHECK: vld1i64:
;CHECK: vld1.64
- %tmp1 = call <1 x i64> @llvm.arm.neon.vld1i.v1i64(i64* %A)
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
ret <1 x i64> %tmp1
}
define <16 x i8> @vld1Qi8(i8* %A) nounwind {
;CHECK: vld1Qi8:
;CHECK: vld1.8
- %tmp1 = call <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8* %A)
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
ret <16 x i8> %tmp1
}
define <8 x i16> @vld1Qi16(i16* %A) nounwind {
;CHECK: vld1Qi16:
;CHECK: vld1.16
- %tmp1 = call <8 x i16> @llvm.arm.neon.vld1i.v8i16(i16* %A)
+ %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
ret <8 x i16> %tmp1
}
define <4 x i32> @vld1Qi32(i32* %A) nounwind {
;CHECK: vld1Qi32:
;CHECK: vld1.32
- %tmp1 = call <4 x i32> @llvm.arm.neon.vld1i.v4i32(i32* %A)
+ %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
ret <4 x i32> %tmp1
}
define <4 x float> @vld1Qf(float* %A) nounwind {
;CHECK: vld1Qf:
;CHECK: vld1.32
- %tmp1 = call <4 x float> @llvm.arm.neon.vld1f.v4f32(float* %A)
+ %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
ret <4 x float> %tmp1
}
define <2 x i64> @vld1Qi64(i64* %A) nounwind {
;CHECK: vld1Qi64:
;CHECK: vld1.64
- %tmp1 = call <2 x i64> @llvm.arm.neon.vld1i.v2i64(i64* %A)
+ %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
ret <2 x i64> %tmp1
}
-declare <8 x i8> @llvm.arm.neon.vld1i.v8i8(i8*) nounwind readonly
-declare <4 x i16> @llvm.arm.neon.vld1i.v4i16(i8*) nounwind readonly
-declare <2 x i32> @llvm.arm.neon.vld1i.v2i32(i8*) nounwind readonly
-declare <2 x float> @llvm.arm.neon.vld1f.v2f32(i8*) nounwind readonly
-declare <1 x i64> @llvm.arm.neon.vld1i.v1i64(i8*) nounwind readonly
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
-declare <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8*) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1i.v8i16(i8*) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vld1i.v4i32(i8*) nounwind readonly
-declare <4 x float> @llvm.arm.neon.vld1f.v4f32(i8*) nounwind readonly
-declare <2 x i64> @llvm.arm.neon.vld1i.v2i64(i8*) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 2c16ac19d8..168b62b9ab 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -8,7 +8,7 @@
define <8 x i8> @vld2i8(i8* %A) nounwind {
;CHECK: vld2i8:
;CHECK: vld2.8
- %tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8* %A)
+ %tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8* %A)
%tmp2 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 0
%tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 1
%tmp4 = add <8