aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Wilson <bob.wilson@apple.com>2009-07-29 16:39:22 +0000
committerBob Wilson <bob.wilson@apple.com>2009-07-29 16:39:22 +0000
commitb7d0c90c449882c8ec697c8989244dba2dc917ae (patch)
treec25f7337ecd0ae0dee2ff100be597a2e38d165ec
parent09b1366f3f310b5648aa8cd72ed16b9f19b4c68d (diff)
Change Neon VLDn intrinsics to return multiple values instead of really
wide vectors. Likewise, change VSTn intrinsics to take separate arguments for each vector in a multi-vector struct. Adjust tests accordingly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77468 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IntrinsicsARM.td60
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td52
-rw-r--r--test/CodeGen/ARM/vld1.ll40
-rw-r--r--test/CodeGen/ARM/vst1.ll40
4 files changed, 114 insertions, 78 deletions
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index e16797ae70..efe5bff437 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -291,20 +291,56 @@ def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic;
let TargetPrefix = "arm" in {
// De-interleaving vector loads from N-element structures.
- def int_arm_neon_vldi : Intrinsic<[llvm_anyint_ty],
- [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadArgMem]>;
- def int_arm_neon_vldf : Intrinsic<[llvm_anyfloat_ty],
- [llvm_ptr_ty, llvm_i32_ty],
- [IntrReadArgMem]>;
+ def int_arm_neon_vld1i : Intrinsic<[llvm_anyint_ty],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld1f : Intrinsic<[llvm_anyfloat_ty],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld2i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld2f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld3i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld3f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld4i : Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
+ def int_arm_neon_vld4f : Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_ptr_ty], [IntrReadArgMem]>;
// Interleaving vector stores from N-element structures.
- def int_arm_neon_vsti : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty],
- [IntrWriteArgMem]>;
- def int_arm_neon_vstf : Intrinsic<[llvm_void_ty],
- [llvm_ptr_ty, llvm_anyfloat_ty,llvm_i32_ty],
- [IntrWriteArgMem]>;
+ def int_arm_neon_vst1i : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyint_ty],
+ [IntrWriteArgMem]>;
+ def int_arm_neon_vst1f : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyfloat_ty],
+ [IntrWriteArgMem]>;
+ def int_arm_neon_vst2i : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyint_ty,
+ LLVMMatchType<0>], [IntrWriteArgMem]>;
+ def int_arm_neon_vst2f : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyfloat_ty,
+ LLVMMatchType<0>], [IntrWriteArgMem]>;
+ def int_arm_neon_vst3i : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyint_ty,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrWriteArgMem]>;
+ def int_arm_neon_vst3f : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyfloat_ty,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrWriteArgMem]>;
+ def int_arm_neon_vst4i : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyint_ty,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>], [IntrWriteArgMem]>;
+ def int_arm_neon_vst4f : Intrinsic<[llvm_void_ty],
+ [llvm_ptr_ty, llvm_anyfloat_ty,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>], [IntrWriteArgMem]>;
// Vector Table Lookup
def int_arm_neon_vtbl : Intrinsic<[llvm_v8i8_ty],
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 9415b40e76..8641d6274a 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -135,45 +135,45 @@ def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr),
class VLD1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
: NLdSt<(outs DPR:$dst), (ins addrmode6:$addr),
!strconcat(OpcodeStr, "\t${dst:dregsingle}, $addr"),
- [(set DPR:$dst, (Ty (IntOp addrmode6:$addr, 1)))]>;
+ [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
class VLD1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
: NLdSt<(outs QPR:$dst), (ins addrmode6:$addr),
!strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"),
- [(set QPR:$dst, (Ty (IntOp addrmode6:$addr, 1)))]>;
+ [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>;
-def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vldi>;
-def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vldi>;
-def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vldi>;
-def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vldf>;
-def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vldi>;
+def VLD1d8 : VLD1D<"vld1.8", v8i8, int_arm_neon_vld1i>;
+def VLD1d16 : VLD1D<"vld1.16", v4i16, int_arm_neon_vld1i>;
+def VLD1d32 : VLD1D<"vld1.32", v2i32, int_arm_neon_vld1i>;
+def VLD1df : VLD1D<"vld1.32", v2f32, int_arm_neon_vld1f>;
+def VLD1d64 : VLD1D<"vld1.64", v1i64, int_arm_neon_vld1i>;
-def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vldi>;
-def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vldi>;
-def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vldi>;
-def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vldf>;
-def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vldi>;
+def VLD1q8 : VLD1Q<"vld1.8", v16i8, int_arm_neon_vld1i>;
+def VLD1q16 : VLD1Q<"vld1.16", v8i16, int_arm_neon_vld1i>;
+def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1i>;
+def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1f>;
+def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1i>;
// VST1 : Vector Store (multiple single elements)
class VST1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
: NLdSt<(outs), (ins addrmode6:$addr, DPR:$src),
!strconcat(OpcodeStr, "\t${src:dregsingle}, $addr"),
- [(IntOp addrmode6:$addr, (Ty DPR:$src), 1)]>;
+ [(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
class VST1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
: NLdSt<(outs), (ins addrmode6:$addr, QPR:$src),
!strconcat(OpcodeStr, "\t${src:dregpair}, $addr"),
- [(IntOp addrmode6:$addr, (Ty QPR:$src), 1)]>;
-
-def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vsti>;
-def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vsti>;
-def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vsti>;
-def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vstf>;
-def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vsti>;
-
-def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vsti>;
-def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vsti>;
-def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vsti>;
-def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vstf>;
-def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vsti>;
+ [(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
+
+def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1i>;
+def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1i>;
+def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1i>;
+def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1f>;
+def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1i>;
+
+def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1i>;
+def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1i>;
+def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>;
+def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
+def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 161cb71d53..fc925f6068 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -5,63 +5,63 @@
; RUN: grep {vld1\\.64} %t | count 2
define <8 x i8> @vld1i8(i8* %A) nounwind {
- %tmp1 = call <8 x i8> @llvm.arm.neon.vldi.v8i8(i8* %A, i32 1)
+ %tmp1 = call <8 x i8> @llvm.arm.neon.vld1i.v8i8(i8* %A)
ret <8 x i8> %tmp1
}
define <4 x i16> @vld1i16(i16* %A) nounwind {
- %tmp1 = call <4 x i16> @llvm.arm.neon.vldi.v4i16(i16* %A, i32 1)
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1i.v4i16(i16* %A)
ret <4 x i16> %tmp1
}
define <2 x i32> @vld1i32(i32* %A) nounwind {
- %tmp1 = call <2 x i32> @llvm.arm.neon.vldi.v2i32(i32* %A, i32 1)
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1i.v2i32(i32* %A)
ret <2 x i32> %tmp1
}
define <2 x float> @vld1f(float* %A) nounwind {
- %tmp1 = call <2 x float> @llvm.arm.neon.vldf.v2f32(float* %A, i32 1)
+ %tmp1 = call <2 x float> @llvm.arm.neon.vld1f.v2f32(float* %A)
ret <2 x float> %tmp1
}
define <1 x i64> @vld1i64(i64* %A) nounwind {
- %tmp1 = call <1 x i64> @llvm.arm.neon.vldi.v1i64(i64* %A, i32 1)
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vld1i.v1i64(i64* %A)
ret <1 x i64> %tmp1
}
define <16 x i8> @vld1Qi8(i8* %A) nounwind {
- %tmp1 = call <16 x i8> @llvm.arm.neon.vldi.v16i8(i8* %A, i32 1)
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8* %A)
ret <16 x i8> %tmp1
}
define <8 x i16> @vld1Qi16(i16* %A) nounwind {
- %tmp1 = call <8 x i16> @llvm.arm.neon.vldi.v8i16(i16* %A, i32 1)
+ %tmp1 = call <8 x i16> @llvm.arm.neon.vld1i.v8i16(i16* %A)
ret <8 x i16> %tmp1
}
define <4 x i32> @vld1Qi32(i32* %A) nounwind {
- %tmp1 = call <4 x i32> @llvm.arm.neon.vldi.v4i32(i32* %A, i32 1)
+ %tmp1 = call <4 x i32> @llvm.arm.neon.vld1i.v4i32(i32* %A)
ret <4 x i32> %tmp1
}
define <4 x float> @vld1Qf(float* %A) nounwind {
- %tmp1 = call <4 x float> @llvm.arm.neon.vldf.v4f32(float* %A, i32 1)
+ %tmp1 = call <4 x float> @llvm.arm.neon.vld1f.v4f32(float* %A)
ret <4 x float> %tmp1
}
define <2 x i64> @vld1Qi64(i64* %A) nounwind {
- %tmp1 = call <2 x i64> @llvm.arm.neon.vldi.v2i64(i64* %A, i32 1)
+ %tmp1 = call <2 x i64> @llvm.arm.neon.vld1i.v2i64(i64* %A)
ret <2 x i64> %tmp1
}
-declare <8 x i8> @llvm.arm.neon.vldi.v8i8(i8*, i32) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vldi.v4i16(i16*, i32) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vldi.v2i32(i32*, i32) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vldf.v2f32(float*, i32) nounwind readnone
-declare <1 x i64> @llvm.arm.neon.vldi.v1i64(i64*, i32) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vld1i.v8i8(i8*) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vld1i.v4i16(i16*) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vld1i.v2i32(i32*) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vld1f.v2f32(float*) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vld1i.v1i64(i64*) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vldi.v16i8(i8*, i32) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vldi.v8i16(i16*, i32) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vldi.v4i32(i32*, i32) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vldf.v4f32(float*, i32) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vldi.v2i64(i64*, i32) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8*) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vld1i.v8i16(i16*) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vld1i.v4i32(i32*) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vld1f.v4f32(float*) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vld1i.v2i64(i64*) nounwind readnone
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 70a05fa803..c4099a50ef 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -6,72 +6,72 @@
define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vsti.v8i8(i8* %A, <8 x i8> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v8i8(i8* %A, <8 x i8> %tmp1)
ret void
}
define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vsti.v4i16(i16* %A, <4 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v4i16(i16* %A, <4 x i16> %tmp1)
ret void
}
define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vsti.v2i32(i32* %A, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v2i32(i32* %A, <2 x i32> %tmp1)
ret void
}
define void @vst1f(float* %A, <2 x float>* %B) nounwind {
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vstf.v2f32(float* %A, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1f.v2f32(float* %A, <2 x float> %tmp1)
ret void
}
define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
%tmp1 = load <1 x i64>* %B
- call void @llvm.arm.neon.vsti.v1i64(i64* %A, <1 x i64> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v1i64(i64* %A, <1 x i64> %tmp1)
ret void
}
define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
%tmp1 = load <16 x i8>* %B
- call void @llvm.arm.neon.vsti.v16i8(i8* %A, <16 x i8> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v16i8(i8* %A, <16 x i8> %tmp1)
ret void
}
define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vsti.v8i16(i16* %A, <8 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v8i16(i16* %A, <8 x i16> %tmp1)
ret void
}
define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vsti.v4i32(i32* %A, <4 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v4i32(i32* %A, <4 x i32> %tmp1)
ret void
}
define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vstf.v4f32(float* %A, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1f.v4f32(float* %A, <4 x float> %tmp1)
ret void
}
define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
%tmp1 = load <2 x i64>* %B
- call void @llvm.arm.neon.vsti.v2i64(i64* %A, <2 x i64> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1i.v2i64(i64* %A, <2 x i64> %tmp1)
ret void
}
-declare void @llvm.arm.neon.vsti.v8i8(i8*, <8 x i8>, i32) nounwind readnone
-declare void @llvm.arm.neon.vsti.v4i16(i16*, <4 x i16>, i32) nounwind readnone
-declare void @llvm.arm.neon.vsti.v2i32(i32*, <2 x i32>, i32) nounwind readnone
-declare void @llvm.arm.neon.vstf.v2f32(float*, <2 x float>, i32) nounwind readnone
-declare void @llvm.arm.neon.vsti.v1i64(i64*, <1 x i64>, i32) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v8i8(i8*, <8 x i8>) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v4i16(i16*, <4 x i16>) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v2i32(i32*, <2 x i32>) nounwind readnone
+declare void @llvm.arm.neon.vst1f.v2f32(float*, <2 x float>) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v1i64(i64*, <1 x i64>) nounwind readnone
-declare void @llvm.arm.neon.vsti.v16i8(i8*, <16 x i8>, i32) nounwind readnone
-declare void @llvm.arm.neon.vsti.v8i16(i16*, <8 x i16>, i32) nounwind readnone
-declare void @llvm.arm.neon.vsti.v4i32(i32*, <4 x i32>, i32) nounwind readnone
-declare void @llvm.arm.neon.vstf.v4f32(float*, <4 x float>, i32) nounwind readnone
-declare void @llvm.arm.neon.vsti.v2i64(i64*, <2 x i64>, i32) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v16i8(i8*, <16 x i8>) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v8i16(i16*, <8 x i16>) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v4i32(i32*, <4 x i32>) nounwind readnone
+declare void @llvm.arm.neon.vst1f.v4f32(float*, <4 x float>) nounwind readnone
+declare void @llvm.arm.neon.vst1i.v2i64(i64*, <2 x i64>) nounwind readnone