diff options
author | Jim Grosbach <grosbach@apple.com> | 2012-09-21 00:18:20 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2012-09-21 00:18:20 +0000 |
commit | ced674e470bb949d0adce90d79563789439333a8 (patch) | |
tree | 2882d1ce74924c07a1228e276526e1f3b0d23857 /test | |
parent | e6e2d8cd90ceb5190aa646dc06584027f7d492d0 (diff) |
ARM: Use a dedicated intrinsic for vector bitwise select.
The expression based expansion too often results in IR level optimizations
splitting the intermediate values into separate basic blocks, preventing
the formation of the VBSL instruction as the code author intended. In
particular, LICM would often hoist part of the computation out of a loop.
rdar://11011471
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164340 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rw-r--r-- | test/CodeGen/ARM/vbsl.ll | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll index 9f3bb4e103..8ca2fd26b6 100644 --- a/test/CodeGen/ARM/vbsl.ll +++ b/test/CodeGen/ARM/vbsl.ll @@ -103,3 +103,52 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin %tmp7 = or <2 x i64> %tmp4, %tmp6 ret <2 x i64> %tmp7 } + +define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp { +; CHECK: f1: +; CHECK: vbsl + %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind + ret <8 x i8> %vbsl.i +} + +define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { +; CHECK: f2: +; CHECK: vbsl + %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind + ret <4 x i16> %vbsl3.i +} + +define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { +; CHECK: f3: +; CHECK: vbsl + %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind + ret <2 x i32> %vbsl3.i +} + +define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp { +; CHECK: g1: +; CHECK: vbsl + %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind + ret <16 x i8> %vbsl.i +} + +define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp { +; CHECK: g2: +; CHECK: vbsl + %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind + ret <8 x i16> %vbsl3.i +} + +define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { +; CHECK: g3: +; CHECK: vbsl + %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind + ret <4 x i32> %vbsl3.i +} + +declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone +declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone |