diff options
author | Eli Bendersky <eliben@chromium.org> | 2012-11-15 14:29:27 -0800 |
---|---|---|
committer | Eli Bendersky <eliben@chromium.org> | 2012-11-15 14:29:27 -0800 |
commit | bb0a8c95795b473f21730c6357b24a7285226527 (patch) | |
tree | 139470a2000caad937cd91d3273fafc67587e5cd /test | |
parent | 923f52fb3f6670e843ffe0b8da2f2bad898d752c (diff) |
Sandboxing of VLD instructions
BUG=nativeclient:3124
Review URL: https://codereview.chromium.org/11413019
Diffstat (limited to 'test')
-rw-r--r-- | test/NaCl/ARM/neon-vld1-sandboxing.ll | 92 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld2-sandboxing.ll | 102 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld3-sandboxing.ll | 79 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld4-sandboxing.ll | 80 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vlddup-sandboxing.ll | 151 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vldlane-sandboxing.ll | 319 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst1-sandboxing.ll | 12 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst2-sandboxing.ll | 8 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst3-sandboxing.ll | 2 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst4-sandboxing.ll | 2 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vstlane-sandboxing.ll | 32 |
11 files changed, 868 insertions, 11 deletions
diff --git a/test/NaCl/ARM/neon-vld1-sandboxing.ll b/test/NaCl/ARM/neon-vld1-sandboxing.ll new file mode 100644 index 0000000000..52395a559f --- /dev/null +++ b/test/NaCl/ARM/neon-vld1-sandboxing.ll @@ -0,0 +1,92 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define <8 x i8> @vld1i8(i8* %A) nounwind { + %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+}}}, [r0, :64] + ret <8 x i8> %tmp1 +} + +define <4 x i16> @vld1i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+}}}, [r0] + ret <4 x i16> %tmp1 +} + +define <2 x i32> @vld1i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+}}}, [r0] + ret <2 x i32> %tmp1 +} + +; Insert useless arguments here just for the sake of moving +; %A further down the rN chain (testing how sandboxing detects +; the correct register and not just the default r0) +define <1 x i64> @vld1i64(i32 %foo, i32 %bar, i32 %baz, + i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1) +; CHECK: bic r3, r3, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}}, [r3] + ret <1 x i64> %tmp1 +} + +define <16 x i8> @vld1Qi8(i8* %A) nounwind { + %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64] + ret <16 x i8> %tmp1 +} + +define <8 x i16> @vld1Qi16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128] + ret <8 x i16> %tmp1 +} + +define <4 x i32> @vld1Qi32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <4 x i32> %tmp1 +} + +define <2 x i64> @vld1Qi64(i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <2 x i64> %tmp1 +} + +declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly +declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly +declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly +declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly + +define <4 x i16> @vld1i16_update(i16** %ptr) nounwind { + %A = load i16** %ptr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+}}}, [r1]! + %tmp2 = getelementptr i16* %A, i32 4 + store i16* %tmp2, i16** %ptr + ret <4 x i16> %tmp1 +} + diff --git a/test/NaCl/ARM/neon-vld2-sandboxing.ll b/test/NaCl/ARM/neon-vld2-sandboxing.ll new file mode 100644 index 0000000000..ffec745e5f --- /dev/null +++ b/test/NaCl/ARM/neon-vld2-sandboxing.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly + +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly + +define <8 x i8> @vld2i8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 + %tmp4 = add <8 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld2i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32) + %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 + %tmp4 = add <4 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128] + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld2i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 + %tmp4 = add <2 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] + ret <2 x i32> %tmp4 +} + +define <16 x i8> @vld2Qi8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld2Qi16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16) + %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 + %tmp4 = add <8 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128] + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld2Qi32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64) + %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 + %tmp4 = add <4 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :256] + ret <4 x i32> %tmp4 +} + +;Check for a post-increment updating load with register increment. +define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind { + %A = load i8** %ptr + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16) + %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r2, r2, #3221225472 +; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r2, :128], r1 + %tmp5 = getelementptr i8* %A, i32 %inc + store i8* %tmp5, i8** %ptr + ret <16 x i8> %tmp4 +} diff --git a/test/NaCl/ARM/neon-vld3-sandboxing.ll b/test/NaCl/ARM/neon-vld3-sandboxing.ll new file mode 100644 index 0000000000..49e38b9c77 --- /dev/null +++ b/test/NaCl/ARM/neon-vld3-sandboxing.ll @@ -0,0 +1,79 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly + +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly + +define <8 x i8> @vld3i8(i32 %foobar, i32 %ba, i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32) + %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 + %tmp4 = add <8 x i8> %tmp2, %tmp3 +; CHECK: bic r2, r2, #3221225472 +; CHECK-NEXT: vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2, :64] + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld3i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 + %tmp4 = add <4 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld3i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 + %tmp4 = add <2 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <2 x i32> %tmp4 +} + +define <1 x i64> @vld3i64(i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64] + ret <1 x i64> %tmp4 +} + + +define <16 x i8> @vld3Qi8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32) + %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]! + ret <16 x i8> %tmp4 +} + diff --git a/test/NaCl/ARM/neon-vld4-sandboxing.ll b/test/NaCl/ARM/neon-vld4-sandboxing.ll new file mode 100644 index 0000000000..14d903c09e --- /dev/null +++ b/test/NaCl/ARM/neon-vld4-sandboxing.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly + +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly + +define <8 x i8> @vld4i8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 + %tmp4 = add <8 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64] + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld4i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16) + %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 + %tmp4 = add <4 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128] + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld4i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32) + %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 + %tmp4 = add <2 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256] + ret <2 x i32> %tmp4 +} + +define <1 x i64> @vld4i64(i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64) + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256] + ret <1 x i64> %tmp4 +} + +define <16 x i8> @vld4Qi8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64) + %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]! +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256] + ret <16 x i8> %tmp4 +} + diff --git a/test/NaCl/ARM/neon-vlddup-sandboxing.ll b/test/NaCl/ARM/neon-vlddup-sandboxing.ll new file mode 100644 index 0000000000..cd77ace644 --- /dev/null +++ b/test/NaCl/ARM/neon-vlddup-sandboxing.ll @@ -0,0 +1,151 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> } + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly + +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } + +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly + +define <8 x i8> @vld1dupi8(i32 %foo, i32 %bar, + i8* %A) nounwind { + %tmp1 = load i8* %A, align 8 + %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK: bic r2, r2, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[\]}}}, [r2] + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vld1dupi16(i16* %A) nounwind { + %tmp1 = load i16* %A, align 8 + %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0 + %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+\[\]}}}, [r0, :16] + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vld1dupi32(i32* %A) nounwind { + %tmp1 = load i32* %A, align 8 + %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 + %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+\[\]}}}, [r0, :32] + ret <2 x i32> %tmp3 +} + +define <16 x i8> @vld1dupQi8(i8* %A) nounwind { + %tmp1 = load i8* %A, align 8 + %tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + ret <16 x i8> %tmp3 +} + +define <8 x i8> @vld2dupi8(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + %tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0 + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1 + %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer + %tmp5 = add <8 x i8> %tmp2, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vld2dupi16(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = add <4 x i16> %tmp2, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vld2dupi32(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64] + %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0 + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1 + %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp5 = add <2 x i32> %tmp2, %tmp4 + ret <2 x i32> %tmp5 +} + +define <4 x i16> @vld3dupi16(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + %tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2 + %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp7 = add <4 x i16> %tmp2, %tmp4 + %tmp8 = add <4 x i16> %tmp7, %tmp6 + ret <4 x i16> %tmp8 +} + +define <2 x i32> @vld4dupi32(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64] + %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0 + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1 + %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2 + %tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3 + %tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp9 = add <2 x i32> %tmp2, %tmp4 + %tmp10 = add <2 x i32> %tmp6, %tmp8 + %tmp11 = add <2 x i32> %tmp9, %tmp10 + ret <2 x i32> %tmp11 +} + +;Check for a post-increment updating load. +define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind { + %A = load i16** %ptr + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r1]! + %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2 + %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3 + %tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp9 = add <4 x i16> %tmp2, %tmp4 + %tmp10 = add <4 x i16> %tmp6, %tmp8 + %tmp11 = add <4 x i16> %tmp9, %tmp10 + %tmp12 = getelementptr i16* %A, i32 4 + store i16* %tmp12, i16** %ptr + ret <4 x i16> %tmp11 +} diff --git a/test/NaCl/ARM/neon-vldlane-sandboxing.ll b/test/NaCl/ARM/neon-vldlane-sandboxing.ll new file mode 100644 index 0000000000..716da93298 --- /dev/null +++ b/test/NaCl/ARM/neon-vldlane-sandboxing.ll @@ -0,0 +1,319 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } + +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly + +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } + +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly + +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } + +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly + +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly + +define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = load i8* %A, align 8 + %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %B + %tmp2 = load i16* %A, align 8 + %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16] + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %B + %tmp2 = load i32* %A, align 8 + %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <2 x i32> %tmp3 +} + +define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind { + %tmp1 = load <16 x i8>* %B + %tmp2 = load i8* %A, align 8 + %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0] + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp1 = load <8 x i16>* %B + %tmp2 = load i16* %A, align 8 + %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16] + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp1 = load <4 x i32>* %B + %tmp2 = load i32* %A, align 8 + %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <4 x i32> %tmp3 +} + +define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4) + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 + %tmp5 = add <8 x i8> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :16] + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 + %tmp5 = add <4 x i16> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vld2lanei32(i32 %foo, i32 %bar, i32 %baz, + i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 +; CHECK: bic r3, r3, #3221225472 +; CHECK-NEXT: vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r3] + ret <2 x i32> %tmp5 +} + +define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 + %tmp5 = add <8 x i16> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 + %tmp5 = add <4 x i32> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64] + ret <4 x i32> %tmp5 +} + +define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 + %tmp6 = add <8 x i8> %tmp3, %tmp4 + %tmp7 = add <8 x i8> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i8> %tmp7 +} + +define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 + %tmp6 = add <4 x i16> %tmp3, %tmp4 + %tmp7 = add <4 x i16> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i16> %tmp7 +} + +define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 + %tmp6 = add <2 x i32> %tmp3, %tmp4 + %tmp7 = add <2 x i32> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <2 x i32> %tmp7 +} + +define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i16> %tmp7 +} + +define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 + %tmp6 = add <4 x i32> %tmp3, %tmp4 + %tmp7 = add <4 x i32> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i32> %tmp7 +} + +define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = add <8 x i8> %tmp3, %tmp4 + %tmp8 = add <8 x i8> %tmp5, %tmp6 + %tmp9 = add <8 x i8> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <8 x i8> %tmp9 +} + +define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4) + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3 + %tmp7 = add <4 x i16> %tmp3, %tmp4 + %tmp8 = add <4 x i16> %tmp5, %tmp6 + %tmp9 = add <4 x i16> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i16> %tmp9 +} + +define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3 + %tmp7 = add <2 x i32> %tmp3, %tmp4 + %tmp8 = add <2 x i32> %tmp5, %tmp6 + %tmp9 = add <2 x i32> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64] + ret <2 x i32> %tmp9 +} + +define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16) + %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3 + %tmp7 = add <8 x i16> %tmp3, %tmp4 + %tmp8 = add <8 x i16> %tmp5, %tmp6 + %tmp9 = add <8 x i16> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64] + ret <8 x i16> %tmp9 +} + +define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3 + %tmp7 = add <4 x i32> %tmp3, %tmp4 + %tmp8 = add <4 x i32> %tmp5, %tmp6 + %tmp9 = add <4 x i32> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i32> %tmp9 +} + diff --git a/test/NaCl/ARM/neon-vst1-sandboxing.ll b/test/NaCl/ARM/neon-vst1-sandboxing.ll index 8fd580bb49..ec5712ee94 100644 --- a/test/NaCl/ARM/neon-vst1-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst1-sandboxing.ll @@ -47,15 +47,19 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { %tmp1 = load <16 x i8>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8) ; CHECK: bic r0, r0, #3221225472 -; CHECK-NEXT: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [r0, :64] +; CHECK-NEXT: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] ret void } define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128] @@ -65,6 +69,8 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0] @@ -74,6 +80,8 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0] @@ -83,6 +91,8 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { %tmp0 = bitcast i64* %A to i8* %tmp1 = load <2 x i64>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0] diff --git a/test/NaCl/ARM/neon-vst2-sandboxing.ll b/test/NaCl/ARM/neon-vst2-sandboxing.ll index e87373c174..431f68612c 100644 --- a/test/NaCl/ARM/neon-vst2-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst2-sandboxing.ll @@ -38,6 +38,8 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind { define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { %tmp1 = load <16 x i8>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :64] @@ -47,6 +49,8 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :128] @@ -56,6 +60,8 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :256] @@ -65,6 +71,8 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0] diff --git a/test/NaCl/ARM/neon-vst3-sandboxing.ll b/test/NaCl/ARM/neon-vst3-sandboxing.ll index b496c0c592..95f85bbeb6 100644 --- a/test/NaCl/ARM/neon-vst3-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst3-sandboxing.ll @@ -32,6 +32,8 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind { %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; CHECK: bic r1, r1, #3221225472 ; CHECK-NEXT: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! diff --git a/test/NaCl/ARM/neon-vst4-sandboxing.ll b/test/NaCl/ARM/neon-vst4-sandboxing.ll index 032f194231..2b0eb31b3d 100644 --- a/test/NaCl/ARM/neon-vst4-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst4-sandboxing.ll @@ -32,6 +32,8 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind { %A = load float** %ptr %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ; CHECK: bic r1, r1, #3221225472 ; CHECK-NEXT: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! diff --git a/test/NaCl/ARM/neon-vstlane-sandboxing.ll b/test/NaCl/ARM/neon-vstlane-sandboxing.ll index 5b4dc63a14..8da70115f9 100644 --- a/test/NaCl/ARM/neon-vstlane-sandboxing.ll +++ b/test/NaCl/ARM/neon-vstlane-sandboxing.ll @@ -3,8 +3,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { %tmp1 = load <8 x i8>* %B - %tmp2 = extractelement <8 x i8> %tmp1, i32 3 - store i8 %tmp2, i8* %A, align 8 + %tmp2 = extractelement <8 x i8> %tmp1, i32 3 + store i8 %tmp2, i8* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[3]}, [r0] ret void @@ -12,8 +12,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %B - %tmp2 = extractelement <4 x i16> %tmp1, i32 2 - store i16 %tmp2, i16* %A, align 8 + %tmp2 = extractelement <4 x i16> %tmp1, i32 2 + store i16 %tmp2, i16* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[2]}, [r0, :16] ret void @@ -21,8 +21,8 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %B - %tmp2 = extractelement <2 x i32> %tmp1, i32 1 - store i32 %tmp2, i32* %A, align 8 + %tmp2 = extractelement <2 x i32> %tmp1, i32 1 + store i32 %tmp2, i32* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.32 {d{{[0-9]+}}[1]}, [r0, :32] ret void @@ -30,8 +30,10 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { %tmp1 = load <16 x i8>* %B - %tmp2 = extractelement <16 x i8> %tmp1, i32 9 - store i8 %tmp2, i8* %A, align 8 +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] + %tmp2 = extractelement <16 x i8> %tmp1, i32 9 + store i8 %tmp2, i8* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[1]}, [r0] ret void @@ -39,8 +41,10 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind { %tmp1 = load <8 x i16>* %B - %tmp2 = extractelement <8 x i16> %tmp1, i32 5 - store i16 %tmp2, i16* %A, align 8 +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] + %tmp2 = extractelement <8 x i16> %tmp1, i32 5 + store i16 %tmp2, i16* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[1]}, [r0, :16] ret void @@ -75,6 +79,8 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] @@ -84,6 +90,8 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0, :64] @@ -145,6 +153,8 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0, :64] @@ -154,6 +164,8 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst4.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0] |