aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorEli Bendersky <eliben@chromium.org>2012-11-15 14:29:27 -0800
committerEli Bendersky <eliben@chromium.org>2012-11-15 14:29:27 -0800
commitbb0a8c95795b473f21730c6357b24a7285226527 (patch)
tree139470a2000caad937cd91d3273fafc67587e5cd /test
parent923f52fb3f6670e843ffe0b8da2f2bad898d752c (diff)
Sandboxing of VLD instructions
BUG=nativeclient:3124 Review URL: https://codereview.chromium.org/11413019
Diffstat (limited to 'test')
-rw-r--r--test/NaCl/ARM/neon-vld1-sandboxing.ll92
-rw-r--r--test/NaCl/ARM/neon-vld2-sandboxing.ll102
-rw-r--r--test/NaCl/ARM/neon-vld3-sandboxing.ll79
-rw-r--r--test/NaCl/ARM/neon-vld4-sandboxing.ll80
-rw-r--r--test/NaCl/ARM/neon-vlddup-sandboxing.ll151
-rw-r--r--test/NaCl/ARM/neon-vldlane-sandboxing.ll319
-rw-r--r--test/NaCl/ARM/neon-vst1-sandboxing.ll12
-rw-r--r--test/NaCl/ARM/neon-vst2-sandboxing.ll8
-rw-r--r--test/NaCl/ARM/neon-vst3-sandboxing.ll2
-rw-r--r--test/NaCl/ARM/neon-vst4-sandboxing.ll2
-rw-r--r--test/NaCl/ARM/neon-vstlane-sandboxing.ll32
11 files changed, 868 insertions, 11 deletions
diff --git a/test/NaCl/ARM/neon-vld1-sandboxing.ll b/test/NaCl/ARM/neon-vld1-sandboxing.ll
new file mode 100644
index 0000000000..52395a559f
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld1-sandboxing.ll
@@ -0,0 +1,92 @@
+; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \
+; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+ %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.8 {{{d[0-9]+}}}, [r0, :64]
+ ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.16 {{{d[0-9]+}}}, [r0]
+ ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.32 {{{d[0-9]+}}}, [r0]
+ ret <2 x i32> %tmp1
+}
+
+; Insert useless arguments here just for the sake of moving
+; %A further down the rN chain (testing how sandboxing detects
+; the correct register and not just the default r0)
+define <1 x i64> @vld1i64(i32 %foo, i32 %bar, i32 %baz,
+ i64* %A) nounwind {
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
+; CHECK: bic r3, r3, #3221225472
+; CHECK-NEXT: vld1.64 {{{d[0-9]+}}}, [r3]
+ ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]
+ ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.16 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128]
+ ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+ ret <4 x i32> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+ ret <2 x i64> %tmp1
+}
+
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+ %A = load i16** %ptr
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.16 {{{d[0-9]+}}}, [r1]!
+ %tmp2 = getelementptr i16* %A, i32 4
+ store i16* %tmp2, i16** %ptr
+ ret <4 x i16> %tmp1
+}
+
diff --git a/test/NaCl/ARM/neon-vld2-sandboxing.ll b/test/NaCl/ARM/neon-vld2-sandboxing.ll
new file mode 100644
index 0000000000..ffec745e5f
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld2-sandboxing.ll
@@ -0,0 +1,102 @@
+; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \
+; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+ %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
+ %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
+ %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128]
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
+ %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+ %tmp4 = add <2 x i32> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+ ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+ %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
+ %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+ %tmp4 = add <8 x i16> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128]
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
+ %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+ %tmp4 = add <4 x i32> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :256]
+ ret <4 x i32> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+ %A = load i8** %ptr
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+ %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK: bic r2, r2, #3221225472
+; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r2, :128], r1
+ %tmp5 = getelementptr i8* %A, i32 %inc
+ store i8* %tmp5, i8** %ptr
+ ret <16 x i8> %tmp4
+}
diff --git a/test/NaCl/ARM/neon-vld3-sandboxing.ll b/test/NaCl/ARM/neon-vld3-sandboxing.ll
new file mode 100644
index 0000000000..49e38b9c77
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld3-sandboxing.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \
+; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly
+
+define <8 x i8> @vld3i8(i32 %foobar, i32 %ba, i8* %A) nounwind {
+ %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
+ %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+; CHECK: bic r2, r2, #3221225472
+; CHECK-NEXT: vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2, :64]
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+ %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
+ %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+ %tmp4 = add <2 x i32> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+ %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]
+ ret <1 x i64> %tmp4
+}
+
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+ %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
+ %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]!
+ ret <16 x i8> %tmp4
+}
+
diff --git a/test/NaCl/ARM/neon-vld4-sandboxing.ll b/test/NaCl/ARM/neon-vld4-sandboxing.ll
new file mode 100644
index 0000000000..14d903c09e
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld4-sandboxing.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \
+; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
+ %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+ %tmp4 = add <8 x i8> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]
+ ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
+ %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+ %tmp4 = add <4 x i16> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128]
+ ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
+ %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+ %tmp4 = add <2 x i32> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]
+ ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+ %tmp0 = bitcast i64* %A to i8*
+ %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+ %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+ %tmp4 = add <1 x i64> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]
+ ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+ %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
+ %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+ %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+ %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]!
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]
+ ret <16 x i8> %tmp4
+}
+
diff --git a/test/NaCl/ARM/neon-vlddup-sandboxing.ll b/test/NaCl/ARM/neon-vlddup-sandboxing.ll
new file mode 100644
index 0000000000..cd77ace644
--- /dev/null
+++ b/test/NaCl/ARM/neon-vlddup-sandboxing.ll
@@ -0,0 +1,151 @@
+; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \
+; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+define <8 x i8> @vld1dupi8(i32 %foo, i32 %bar,
+ i8* %A) nounwind {
+ %tmp1 = load i8* %A, align 8
+ %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK: bic r2, r2, #3221225472
+; CHECK-NEXT: vld1.8 {{{d[0-9]+\[\]}}}, [r2]
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1dupi16(i16* %A) nounwind {
+ %tmp1 = load i16* %A, align 8
+ %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.16 {{{d[0-9]+\[\]}}}, [r0, :16]
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1dupi32(i32* %A) nounwind {
+ %tmp1 = load i32* %A, align 8
+ %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+ %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.32 {{{d[0-9]+\[\]}}}, [r0, :32]
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
+ %tmp1 = load i8* %A, align 8
+ %tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @vld2dupi8(i8* %A) nounwind {
+ %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+ %tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
+ %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+ %tmp5 = add <8 x i8> %tmp2, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2dupi16(i8* %A) nounwind {
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+ %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = add <4 x i16> %tmp2, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2dupi32(i8* %A) nounwind {
+ %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64]
+ %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
+ %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+ %tmp5 = add <2 x i32> %tmp2, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <4 x i16> @vld3dupi16(i8* %A) nounwind {
+ %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+ %tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
+ %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp7 = add <4 x i16> %tmp2, %tmp4
+ %tmp8 = add <4 x i16> %tmp7, %tmp6
+ ret <4 x i16> %tmp8
+}
+
+define <2 x i32> @vld4dupi32(i8* %A) nounwind {
+ %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64]
+ %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
+ %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+ %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2
+ %tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer
+ %tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3
+ %tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer
+ %tmp9 = add <2 x i32> %tmp2, %tmp4
+ %tmp10 = add <2 x i32> %tmp6, %tmp8
+ %tmp11 = add <2 x i32> %tmp9, %tmp10
+ ret <2 x i32> %tmp11
+}
+
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+ %A = load i16** %ptr
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld4.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r1]!
+ %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+ %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+ %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+ %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+ %tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+ %tmp9 = add <4 x i16> %tmp2, %tmp4
+ %tmp10 = add <4 x i16> %tmp6, %tmp8
+ %tmp11 = add <4 x i16> %tmp9, %tmp10
+ %tmp12 = getelementptr i16* %A, i32 4
+ store i16* %tmp12, i16** %ptr
+ ret <4 x i16> %tmp11
+}
diff --git a/test/NaCl/ARM/neon-vldlane-sandboxing.ll b/test/NaCl/ARM/neon-vldlane-sandboxing.ll
new file mode 100644
index 0000000000..716da93298
--- /dev/null
+++ b/test/NaCl/ARM/neon-vldlane-sandboxing.ll
@@ -0,0 +1,319 @@
+; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \
+; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = load i8* %A, align 8
+ %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = load i16* %A, align 8
+ %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16]
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = load i32* %A, align 8
+ %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+ %tmp1 = load <16 x i8>* %B
+ %tmp2 = load i8* %A, align 8
+ %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = load i16* %A, align 8
+ %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16]
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = load i32* %A, align 8
+ %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+ ret <4 x i32> %tmp3
+}
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+ %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :16]
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32 %foo, i32 %bar, i32 %baz,
+ i32* %A, <2 x i32>* %B) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+; CHECK: bic r3, r3, #3221225472
+; CHECK-NEXT: vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r3]
+ ret <2 x i32> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+ %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64]
+ ret <4 x i32> %tmp5
+}
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+ %tmp6 = add <8 x i8> %tmp3, %tmp4
+ %tmp7 = add <8 x i8> %tmp5, %tmp6
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+ %tmp6 = add <4 x i16> %tmp3, %tmp4
+ %tmp7 = add <4 x i16> %tmp5, %tmp6
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+ %tmp6 = add <2 x i32> %tmp3, %tmp4
+ %tmp7 = add <2 x i32> %tmp5, %tmp6
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <2 x i32> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+ %tmp6 = add <8 x i16> %tmp3, %tmp4
+ %tmp7 = add <8 x i16> %tmp5, %tmp6
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+ %tmp6 = add <4 x i32> %tmp3, %tmp4
+ %tmp7 = add <4 x i32> %tmp5, %tmp6
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <4 x i32> %tmp7
+}
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+ %tmp1 = load <8 x i8>* %B
+ %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+ %tmp7 = add <8 x i8> %tmp3, %tmp4
+ %tmp8 = add <8 x i8> %tmp5, %tmp6
+ %tmp9 = add <8 x i8> %tmp7, %tmp8
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+ ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <4 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
+ %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+ %tmp7 = add <4 x i16> %tmp3, %tmp4
+ %tmp8 = add <4 x i16> %tmp5, %tmp6
+ %tmp9 = add <4 x i16> %tmp7, %tmp8
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <2 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
+ %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+ %tmp7 = add <2 x i32> %tmp3, %tmp4
+ %tmp8 = add <2 x i32> %tmp5, %tmp6
+ %tmp9 = add <2 x i32> %tmp7, %tmp8
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64]
+ ret <2 x i32> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+ %tmp0 = bitcast i16* %A to i8*
+ %tmp1 = load <8 x i16>* %B
+ %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
+ %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+ %tmp7 = add <8 x i16> %tmp3, %tmp4
+ %tmp8 = add <8 x i16> %tmp5, %tmp6
+ %tmp9 = add <8 x i16> %tmp7, %tmp8
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64]
+ ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+ %tmp0 = bitcast i32* %A to i8*
+ %tmp1 = load <4 x i32>* %B
+ %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+ %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+ %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+ %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+ %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+ %tmp7 = add <4 x i32> %tmp3, %tmp4
+ %tmp8 = add <4 x i32> %tmp5, %tmp6
+ %tmp9 = add <4 x i32> %tmp7, %tmp8
+; CHECK: bic r0, r0, #3221225472
+; CHECK-NEXT: vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+ ret <4 x i32> %tmp9
+}
+
diff --git a/test/NaCl/ARM/neon-vst1-sandboxing.ll b/test/NaCl/ARM/neon-vst1-sandboxing.ll
index 8fd580bb49..ec5712ee94 100644
--- a/test/NaCl/ARM/neon-vst1-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst1-sandboxing.ll
@@ -47,15 +47,19 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
%tmp1 = load <16 x i8>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
; CHECK: bic r0, r0, #3221225472
-; CHECK-NEXT: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
ret void
}
define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128]
@@ -65,6 +69,8 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0]
@@ -74,6 +80,8 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0]
@@ -83,6 +91,8 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <2 x i64>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0]
diff --git a/test/NaCl/ARM/neon-vst2-sandboxing.ll b/test/NaCl/ARM/neon-vst2-sandboxing.ll
index e87373c174..431f68612c 100644
--- a/test/NaCl/ARM/neon-vst2-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst2-sandboxing.ll
@@ -38,6 +38,8 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
%tmp1 = load <16 x i8>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :64]
@@ -47,6 +49,8 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :128]
@@ -56,6 +60,8 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :256]
@@ -65,6 +71,8 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0]
diff --git a/test/NaCl/ARM/neon-vst3-sandboxing.ll b/test/NaCl/ARM/neon-vst3-sandboxing.ll
index b496c0c592..95f85bbeb6 100644
--- a/test/NaCl/ARM/neon-vst3-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst3-sandboxing.ll
@@ -32,6 +32,8 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
%A = load i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
; CHECK: bic r1, r1, #3221225472
; CHECK-NEXT: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
diff --git a/test/NaCl/ARM/neon-vst4-sandboxing.ll b/test/NaCl/ARM/neon-vst4-sandboxing.ll
index 032f194231..2b0eb31b3d 100644
--- a/test/NaCl/ARM/neon-vst4-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst4-sandboxing.ll
@@ -32,6 +32,8 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
%A = load float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
; CHECK: bic r1, r1, #3221225472
; CHECK-NEXT: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
diff --git a/test/NaCl/ARM/neon-vstlane-sandboxing.ll b/test/NaCl/ARM/neon-vstlane-sandboxing.ll
index 5b4dc63a14..8da70115f9 100644
--- a/test/NaCl/ARM/neon-vstlane-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vstlane-sandboxing.ll
@@ -3,8 +3,8 @@
define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
%tmp1 = load <8 x i8>* %B
- %tmp2 = extractelement <8 x i8> %tmp1, i32 3
- store i8 %tmp2, i8* %A, align 8
+ %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+ store i8 %tmp2, i8* %A, align 8
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[3]}, [r0]
ret void
@@ -12,8 +12,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
%tmp1 = load <4 x i16>* %B
- %tmp2 = extractelement <4 x i16> %tmp1, i32 2
- store i16 %tmp2, i16* %A, align 8
+ %tmp2 = extractelement <4 x i16> %tmp1, i32 2
+ store i16 %tmp2, i16* %A, align 8
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[2]}, [r0, :16]
ret void
@@ -21,8 +21,8 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
%tmp1 = load <2 x i32>* %B
- %tmp2 = extractelement <2 x i32> %tmp1, i32 1
- store i32 %tmp2, i32* %A, align 8
+ %tmp2 = extractelement <2 x i32> %tmp1, i32 1
+ store i32 %tmp2, i32* %A, align 8
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.32 {d{{[0-9]+}}[1]}, [r0, :32]
ret void
@@ -30,8 +30,10 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
%tmp1 = load <16 x i8>* %B
- %tmp2 = extractelement <16 x i8> %tmp1, i32 9
- store i8 %tmp2, i8* %A, align 8
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+ %tmp2 = extractelement <16 x i8> %tmp1, i32 9
+ store i8 %tmp2, i8* %A, align 8
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[1]}, [r0]
ret void
@@ -39,8 +41,10 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
%tmp1 = load <8 x i16>* %B
- %tmp2 = extractelement <8 x i16> %tmp1, i32 5
- store i16 %tmp2, i16* %A, align 8
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+ %tmp2 = extractelement <8 x i16> %tmp1, i32 5
+ store i16 %tmp2, i16* %A, align 8
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[1]}, [r0, :16]
ret void
@@ -75,6 +79,8 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
@@ -84,6 +90,8 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0, :64]
@@ -145,6 +153,8 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0, :64]
@@ -154,6 +164,8 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
+; CHECK: bic r1, r1, #3221225472
+; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
; CHECK: bic r0, r0, #3221225472
; CHECK-NEXT: vst4.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0]