aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/ARM/neon_div.ll
diff options
context:
space:
mode:
authorNate Begeman <natebegeman@mac.com>2011-02-11 20:53:29 +0000
committerNate Begeman <natebegeman@mac.com>2011-02-11 20:53:29 +0000
commit7973f350b78e0bef8567f441f3255c846f5432ac (patch)
tree40751dd16f0ba35b84f0751caea1768024c95224 /test/CodeGen/ARM/neon_div.ll
parentf94fdb6f571c2cdf8390ead558ba40b267df1172 (diff)
Implement sdiv & udiv for <4 x i16> and <8 x i8> NEON vector types.
This avoids moving each element to the integer register file and calling __divsi3 etc. on it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125402 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM/neon_div.ll')
-rw-r--r--test/CodeGen/ARM/neon_div.ll48
1 files changed, 48 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
new file mode 100644
index 0000000000..e337970790
--- /dev/null
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecpe.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i16
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = sdiv <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vqmovun.s16
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = udiv <8 x i8> %tmp1, %tmp2
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = sdiv <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}
+
+define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+ %tmp1 = load <4 x i16>* %A
+ %tmp2 = load <4 x i16>* %B
+ %tmp3 = udiv <4 x i16> %tmp1, %tmp2
+ ret <4 x i16> %tmp3
+}