diff options
author | Nate Begeman <natebegeman@mac.com> | 2011-02-11 20:53:29 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2011-02-11 20:53:29 +0000 |
commit | 7973f350b78e0bef8567f441f3255c846f5432ac (patch) | |
tree | 40751dd16f0ba35b84f0751caea1768024c95224 /test/CodeGen/ARM/neon_div.ll | |
parent | f94fdb6f571c2cdf8390ead558ba40b267df1172 (diff) |
Implement sdiv & udiv for <4 x i16> and <8 x i8> NEON vector types.
This avoids moving each element to the integer register file and calling __divsi3 etc. on it.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125402 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/ARM/neon_div.ll')
-rw-r--r-- | test/CodeGen/ARM/neon_div.ll | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll new file mode 100644 index 0000000000..e337970790 --- /dev/null +++ b/test/CodeGen/ARM/neon_div.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrecpe.f32 +;CHECK: vrecpe.f32 +;CHECK: vmovn.i32 +;CHECK: vmovn.i32 +;CHECK: vmovn.i16 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = sdiv <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrecpe.f32 +;CHECK: vrecps.f32 +;CHECK: vrecpe.f32 +;CHECK: vrecps.f32 +;CHECK: vmovn.i32 +;CHECK: vmovn.i32 +;CHECK: vqmovun.s16 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = udiv <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrecpe.f32 +;CHECK: vrecps.f32 +;CHECK: vmovn.i32 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = sdiv <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} + +define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrecpe.f32 +;CHECK: vrecps.f32 +;CHECK: vrecps.f32 +;CHECK: vmovn.i32 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = udiv <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} |