diff options
-rw-r--r-- | lib/Target/ARM/ARMNaClRewritePass.cpp | 272 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld1-sandboxing.ll | 92 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld2-sandboxing.ll | 102 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld3-sandboxing.ll | 79 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vld4-sandboxing.ll | 80 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vlddup-sandboxing.ll | 151 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vldlane-sandboxing.ll | 319 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst1-sandboxing.ll | 12 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst2-sandboxing.ll | 8 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst3-sandboxing.ll | 2 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vst4-sandboxing.ll | 2 | ||||
-rw-r--r-- | test/NaCl/ARM/neon-vstlane-sandboxing.ll | 32 |
12 files changed, 1140 insertions, 11 deletions
diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp index f7f64601d7..c54afe742c 100644 --- a/lib/Target/ARM/ARMNaClRewritePass.cpp +++ b/lib/Target/ARM/ARMNaClRewritePass.cpp @@ -617,6 +617,278 @@ static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) { break; // + // NEON loads + // + + // VLD1 + case ARM::VLD1d8: + case ARM::VLD1d16: + case ARM::VLD1d32: + case ARM::VLD1d64: + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1d8wb_fixed: + case ARM::VLD1d16wb_fixed: + case ARM::VLD1d32wb_fixed: + case ARM::VLD1d64wb_fixed: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1d8wb_register: + case ARM::VLD1d16wb_register: + case ARM::VLD1d32wb_register: + case ARM::VLD1d64wb_register: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + + // VLD1T + case ARM::VLD1d8T: + case ARM::VLD1d16T: + case ARM::VLD1d32T: + case ARM::VLD1d64T: + *AddrIdx = 1; + break; + case ARM::VLD1d8Twb_fixed: + case ARM::VLD1d16Twb_fixed: + case ARM::VLD1d32Twb_fixed: + case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d8Twb_register: + case ARM::VLD1d16Twb_register: + case ARM::VLD1d32Twb_register: + case ARM::VLD1d64Twb_register: + *AddrIdx = 2; + break; + + // VLD1Q + case ARM::VLD1d8Q: + case ARM::VLD1d16Q: + case ARM::VLD1d32Q: + case ARM::VLD1d64Q: + *AddrIdx = 1; + break; + case ARM::VLD1d8Qwb_fixed: + case ARM::VLD1d16Qwb_fixed: + case ARM::VLD1d32Qwb_fixed: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d8Qwb_register: + case ARM::VLD1d16Qwb_register: + case ARM::VLD1d32Qwb_register: + case ARM::VLD1d64Qwb_register: + *AddrIdx = 2; + break; + + // VLD1LN + case ARM::VLD1LNd8: + case ARM::VLD1LNd16: + case ARM::VLD1LNd32: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16_UPD: + case ARM::VLD1LNd32_UPD: + + // VLD1DUP + case ARM::VLD1DUPd8: + case ARM::VLD1DUPd16: + case ARM::VLD1DUPd32: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPd8wb_fixed: + case ARM::VLD1DUPd16wb_fixed: + case ARM::VLD1DUPd32wb_fixed: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPd8wb_register: + case ARM::VLD1DUPd16wb_register: + case ARM::VLD1DUPd32wb_register: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: + + // VLD2 + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2b8: + case ARM::VLD2b16: + case ARM::VLD2b32: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + *AddrIdx = 1; + break; + + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2b8wb_register: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: + *AddrIdx = 2; + break; + + // VLD2LN + case ARM::VLD2LNd8: + case ARM::VLD2LNd16: + case ARM::VLD2LNd32: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: + *AddrIdx = 2; + break; + + case ARM::VLD2LNd8_UPD: + case ARM::VLD2LNd16_UPD: + case ARM::VLD2LNd32_UPD: + case ARM::VLD2LNq16_UPD: + case ARM::VLD2LNq32_UPD: + *AddrIdx = 3; + break; + + // VLD2DUP + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8x2: + case ARM::VLD2DUPd16x2: + case ARM::VLD2DUPd32x2: + *AddrIdx = 1; + break; + + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: + case ARM::VLD2DUPd8x2wb_fixed: + case ARM::VLD2DUPd16x2wb_fixed: + case ARM::VLD2DUPd32x2wb_fixed: + case ARM::VLD2DUPd8x2wb_register: + case ARM::VLD2DUPd16x2wb_register: + case ARM::VLD2DUPd32x2wb_register: + *AddrIdx = 2; + break; + + // VLD3 + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD3q8: + case ARM::VLD3q16: + case ARM::VLD3q32: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + + // VLD3LN + case ARM::VLD3LNd8: + case ARM::VLD3LNd16: + case ARM::VLD3LNd32: + case ARM::VLD3LNq16: + case ARM::VLD3LNq32: + *AddrIdx = 3; + break; + + case ARM::VLD3LNd8_UPD: + case ARM::VLD3LNd16_UPD: + case ARM::VLD3LNd32_UPD: + case ARM::VLD3LNq16_UPD: + case ARM::VLD3LNq32_UPD: + *AddrIdx = 4; + break; + + // VLD3DUP + case ARM::VLD3DUPd8: + case ARM::VLD3DUPd16: + case ARM::VLD3DUPd32: + case ARM::VLD3DUPq8: + case ARM::VLD3DUPq16: + case ARM::VLD3DUPq32: + *AddrIdx = 3; + break; + + case ARM::VLD3DUPd8_UPD: + case ARM::VLD3DUPd16_UPD: + case ARM::VLD3DUPd32_UPD: + case ARM::VLD3DUPq8_UPD: + case ARM::VLD3DUPq16_UPD: + case ARM::VLD3DUPq32_UPD: + *AddrIdx = 4; + break; + + // VLD4 + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4q8: + case ARM::VLD4q16: + case ARM::VLD4q32: + *AddrIdx = 4; + break; + + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + *AddrIdx = 5; + break; + + // VLD4LN + case ARM::VLD4LNd8: + case ARM::VLD4LNd16: + case ARM::VLD4LNd32: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: + *AddrIdx = 4; + break; + + case ARM::VLD4LNd8_UPD: + case ARM::VLD4LNd16_UPD: + case ARM::VLD4LNd32_UPD: + case ARM::VLD4LNq16_UPD: + case ARM::VLD4LNq32_UPD: + *AddrIdx = 5; + break; + + case ARM::VLD4DUPd8: + case ARM::VLD4DUPd16: + case ARM::VLD4DUPd32: + case ARM::VLD4DUPq16: + case ARM::VLD4DUPq32: + *AddrIdx = 4; + break; + + case ARM::VLD4DUPd8_UPD: + case ARM::VLD4DUPd16_UPD: + case ARM::VLD4DUPd32_UPD: + case ARM::VLD4DUPq16_UPD: + case ARM::VLD4DUPq32_UPD: + *AddrIdx = 5; + break; + + // // NEON stores // diff --git a/test/NaCl/ARM/neon-vld1-sandboxing.ll b/test/NaCl/ARM/neon-vld1-sandboxing.ll new file mode 100644 index 0000000000..52395a559f --- /dev/null +++ b/test/NaCl/ARM/neon-vld1-sandboxing.ll @@ -0,0 +1,92 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define <8 x i8> @vld1i8(i8* %A) nounwind { + %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+}}}, [r0, :64] + ret <8 x i8> %tmp1 +} + +define <4 x i16> @vld1i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+}}}, [r0] + ret <4 x i16> %tmp1 +} + +define <2 x i32> @vld1i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+}}}, [r0] + ret <2 x i32> %tmp1 +} + +; Insert useless arguments here just for the sake of moving +; %A further down the rN chain (testing how sandboxing detects +; the correct register and not just the default r0) +define <1 x i64> @vld1i64(i32 %foo, i32 %bar, i32 %baz, + i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1) +; CHECK: bic r3, r3, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}}, [r3] + ret <1 x i64> %tmp1 +} + +define <16 x i8> @vld1Qi8(i8* %A) nounwind { + %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64] + ret <16 x i8> %tmp1 +} + +define <8 x i16> @vld1Qi16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128] + ret <8 x i16> %tmp1 +} + +define <4 x i32> @vld1Qi32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <4 x i32> %tmp1 +} + +define <2 x i64> @vld1Qi64(i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <2 x i64> %tmp1 +} + +declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly +declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly +declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly +declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly + +define <4 x i16> @vld1i16_update(i16** %ptr) nounwind { + %A = load i16** %ptr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+}}}, [r1]! + %tmp2 = getelementptr i16* %A, i32 4 + store i16* %tmp2, i16** %ptr + ret <4 x i16> %tmp1 +} + diff --git a/test/NaCl/ARM/neon-vld2-sandboxing.ll b/test/NaCl/ARM/neon-vld2-sandboxing.ll new file mode 100644 index 0000000000..ffec745e5f --- /dev/null +++ b/test/NaCl/ARM/neon-vld2-sandboxing.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly + +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly + +define <8 x i8> @vld2i8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 + %tmp4 = add <8 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld2i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32) + %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 + %tmp4 = add <4 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128] + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld2i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 + %tmp4 = add <2 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] + ret <2 x i32> %tmp4 +} + +define <16 x i8> @vld2Qi8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld2Qi16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16) + %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 + %tmp4 = add <8 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128] + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld2Qi32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64) + %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 + %tmp4 = add <4 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK: vld2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :256] + ret <4 x i32> %tmp4 +} + +;Check for a post-increment updating load with register increment. +define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind { + %A = load i8** %ptr + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16) + %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r2, r2, #3221225472 +; CHECK: vld2.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r2, :128], r1 + %tmp5 = getelementptr i8* %A, i32 %inc + store i8* %tmp5, i8** %ptr + ret <16 x i8> %tmp4 +} diff --git a/test/NaCl/ARM/neon-vld3-sandboxing.ll b/test/NaCl/ARM/neon-vld3-sandboxing.ll new file mode 100644 index 0000000000..49e38b9c77 --- /dev/null +++ b/test/NaCl/ARM/neon-vld3-sandboxing.ll @@ -0,0 +1,79 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly + +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly + +define <8 x i8> @vld3i8(i32 %foobar, i32 %ba, i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32) + %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 + %tmp4 = add <8 x i8> %tmp2, %tmp3 +; CHECK: bic r2, r2, #3221225472 +; CHECK-NEXT: vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2, :64] + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld3i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 + %tmp4 = add <4 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld3i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 + %tmp4 = add <2 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0] + ret <2 x i32> %tmp4 +} + +define <1 x i64> @vld3i64(i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64] + ret <1 x i64> %tmp4 +} + + +define <16 x i8> @vld3Qi8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32) + %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]! + ret <16 x i8> %tmp4 +} + diff --git a/test/NaCl/ARM/neon-vld4-sandboxing.ll b/test/NaCl/ARM/neon-vld4-sandboxing.ll new file mode 100644 index 0000000000..14d903c09e --- /dev/null +++ b/test/NaCl/ARM/neon-vld4-sandboxing.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly + +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly + +define <8 x i8> @vld4i8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 + %tmp4 = add <8 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64] + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld4i16(i16* %A) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16) + %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 + %tmp4 = add <4 x i16> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128] + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld4i32(i32* %A) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32) + %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 + %tmp4 = add <2 x i32> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256] + ret <2 x i32> %tmp4 +} + +define <1 x i64> @vld4i64(i64* %A) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64) + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256] + ret <1 x i64> %tmp4 +} + +define <16 x i8> @vld4Qi8(i8* %A) nounwind { + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64) + %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]! +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256] + ret <16 x i8> %tmp4 +} + diff --git a/test/NaCl/ARM/neon-vlddup-sandboxing.ll b/test/NaCl/ARM/neon-vlddup-sandboxing.ll new file mode 100644 index 0000000000..cd77ace644 --- /dev/null +++ b/test/NaCl/ARM/neon-vlddup-sandboxing.ll @@ -0,0 +1,151 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> } + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly + +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } + +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly + +define <8 x i8> @vld1dupi8(i32 %foo, i32 %bar, + i8* %A) nounwind { + %tmp1 = load i8* %A, align 8 + %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK: bic r2, r2, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[\]}}}, [r2] + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vld1dupi16(i16* %A) nounwind { + %tmp1 = load i16* %A, align 8 + %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0 + %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+\[\]}}}, [r0, :16] + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vld1dupi32(i32* %A) nounwind { + %tmp1 = load i32* %A, align 8 + %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 + %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+\[\]}}}, [r0, :32] + ret <2 x i32> %tmp3 +} + +define <16 x i8> @vld1dupQi8(i8* %A) nounwind { + %tmp1 = load i8* %A, align 8 + %tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0 + %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + ret <16 x i8> %tmp3 +} + +define <8 x i8> @vld2dupi8(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + %tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0 + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1 + %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer + %tmp5 = add <8 x i8> %tmp2, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vld2dupi16(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = add <4 x i16> %tmp2, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vld2dupi32(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64] + %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0 + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1 + %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp5 = add <2 x i32> %tmp2, %tmp4 + ret <2 x i32> %tmp5 +} + +define <4 x i16> @vld3dupi16(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0] + %tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2 + %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp7 = add <4 x i16> %tmp2, %tmp4 + %tmp8 = add <4 x i16> %tmp7, %tmp6 + ret <4 x i16> %tmp8 +} + +define <2 x i32> @vld4dupi32(i8* %A) nounwind { + %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64] + %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0 + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1 + %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2 + %tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3 + %tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp9 = add <2 x i32> %tmp2, %tmp4 + %tmp10 = add <2 x i32> %tmp6, %tmp8 + %tmp11 = add <2 x i32> %tmp9, %tmp10 + ret <2 x i32> %tmp11 +} + +;Check for a post-increment updating load. +define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind { + %A = load i16** %ptr + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r1]! + %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2 + %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3 + %tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp9 = add <4 x i16> %tmp2, %tmp4 + %tmp10 = add <4 x i16> %tmp6, %tmp8 + %tmp11 = add <4 x i16> %tmp9, %tmp10 + %tmp12 = getelementptr i16* %A, i32 4 + store i16* %tmp12, i16** %ptr + ret <4 x i16> %tmp11 +} diff --git a/test/NaCl/ARM/neon-vldlane-sandboxing.ll b/test/NaCl/ARM/neon-vldlane-sandboxing.ll new file mode 100644 index 0000000000..716da93298 --- /dev/null +++ b/test/NaCl/ARM/neon-vldlane-sandboxing.ll @@ -0,0 +1,319 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } + +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly + +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } + +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly + +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } + +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly + +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly + +define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = load i8* %A, align 8 + %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %B + %tmp2 = load i16* %A, align 8 + %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16] + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %B + %tmp2 = load i32* %A, align 8 + %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <2 x i32> %tmp3 +} + +define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind { + %tmp1 = load <16 x i8>* %B + %tmp2 = load i8* %A, align 8 + %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0] + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp1 = load <8 x i16>* %B + %tmp2 = load i16* %A, align 8 + %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16] + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp1 = load <4 x i32>* %B + %tmp2 = load i32* %A, align 8 + %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <4 x i32> %tmp3 +} + +define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4) + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 + %tmp5 = add <8 x i8> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :16] + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 + %tmp5 = add <4 x i16> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vld2lanei32(i32 %foo, i32 %bar, i32 %baz, + i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 +; CHECK: bic r3, r3, #3221225472 +; CHECK-NEXT: vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r3] + ret <2 x i32> %tmp5 +} + +define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 + %tmp5 = add <8 x i16> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 + %tmp5 = add <4 x i32> %tmp3, %tmp4 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64] + ret <4 x i32> %tmp5 +} + +define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 + %tmp6 = add <8 x i8> %tmp3, %tmp4 + %tmp7 = add <8 x i8> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i8> %tmp7 +} + +define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 + %tmp6 = add <4 x i16> %tmp3, %tmp4 + %tmp7 = add <4 x i16> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i16> %tmp7 +} + +define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 + %tmp6 = add <2 x i32> %tmp3, %tmp4 + %tmp7 = add <2 x i32> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <2 x i32> %tmp7 +} + +define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <8 x i16> %tmp7 +} + +define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 + %tmp6 = add <4 x i32> %tmp3, %tmp4 + %tmp7 = add <4 x i32> %tmp5, %tmp6 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i32> %tmp7 +} + +define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = add <8 x i8> %tmp3, %tmp4 + %tmp8 = add <8 x i8> %tmp5, %tmp6 + %tmp9 = add <8 x i8> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32] + ret <8 x i8> %tmp9 +} + +define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4) + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3 + %tmp7 = add <4 x i16> %tmp3, %tmp4 + %tmp8 = add <4 x i16> %tmp5, %tmp6 + %tmp9 = add <4 x i16> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i16> %tmp9 +} + +define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8) + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3 + %tmp7 = add <2 x i32> %tmp3, %tmp4 + %tmp8 = add <2 x i32> %tmp5, %tmp6 + %tmp9 = add <2 x i32> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64] + ret <2 x i32> %tmp9 +} + +define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16) + %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3 + %tmp7 = add <8 x i16> %tmp3, %tmp4 + %tmp8 = add <8 x i16> %tmp5, %tmp6 + %tmp9 = add <8 x i16> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64] + ret <8 x i16> %tmp9 +} + +define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3 + %tmp7 = add <4 x i32> %tmp3, %tmp4 + %tmp8 = add <4 x i32> %tmp5, %tmp6 + %tmp9 = add <4 x i32> %tmp7, %tmp8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0] + ret <4 x i32> %tmp9 +} + diff --git a/test/NaCl/ARM/neon-vst1-sandboxing.ll b/test/NaCl/ARM/neon-vst1-sandboxing.ll index 8fd580bb49..ec5712ee94 100644 --- a/test/NaCl/ARM/neon-vst1-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst1-sandboxing.ll @@ -47,15 +47,19 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { %tmp1 = load <16 x i8>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8) ; CHECK: bic r0, r0, #3221225472 -; CHECK-NEXT: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [r0, :64] +; CHECK-NEXT: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] ret void } define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128] @@ -65,6 +69,8 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0] @@ -74,6 +80,8 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0] @@ -83,6 +91,8 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { %tmp0 = bitcast i64* %A to i8* %tmp1 = load <2 x i64>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0] diff --git a/test/NaCl/ARM/neon-vst2-sandboxing.ll b/test/NaCl/ARM/neon-vst2-sandboxing.ll index e87373c174..431f68612c 100644 --- a/test/NaCl/ARM/neon-vst2-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst2-sandboxing.ll @@ -38,6 +38,8 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind { define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { %tmp1 = load <16 x i8>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :64] @@ -47,6 +49,8 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :128] @@ -56,6 +60,8 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :256] @@ -65,6 +71,8 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0] diff --git a/test/NaCl/ARM/neon-vst3-sandboxing.ll b/test/NaCl/ARM/neon-vst3-sandboxing.ll index b496c0c592..95f85bbeb6 100644 --- a/test/NaCl/ARM/neon-vst3-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst3-sandboxing.ll @@ -32,6 +32,8 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind { %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; CHECK: bic r1, r1, #3221225472 ; CHECK-NEXT: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! diff --git a/test/NaCl/ARM/neon-vst4-sandboxing.ll b/test/NaCl/ARM/neon-vst4-sandboxing.ll index 032f194231..2b0eb31b3d 100644 --- a/test/NaCl/ARM/neon-vst4-sandboxing.ll +++ b/test/NaCl/ARM/neon-vst4-sandboxing.ll @@ -32,6 +32,8 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind { %A = load float** %ptr %tmp0 = bitcast float* %A to i8* %tmp1 = load <4 x float>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) ; CHECK: bic r1, r1, #3221225472 ; CHECK-NEXT: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! diff --git a/test/NaCl/ARM/neon-vstlane-sandboxing.ll b/test/NaCl/ARM/neon-vstlane-sandboxing.ll index 5b4dc63a14..8da70115f9 100644 --- a/test/NaCl/ARM/neon-vstlane-sandboxing.ll +++ b/test/NaCl/ARM/neon-vstlane-sandboxing.ll @@ -3,8 +3,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { %tmp1 = load <8 x i8>* %B - %tmp2 = extractelement <8 x i8> %tmp1, i32 3 - store i8 %tmp2, i8* %A, align 8 + %tmp2 = extractelement <8 x i8> %tmp1, i32 3 + store i8 %tmp2, i8* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[3]}, [r0] ret void @@ -12,8 +12,8 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { %tmp1 = load <4 x i16>* %B - %tmp2 = extractelement <4 x i16> %tmp1, i32 2 - store i16 %tmp2, i16* %A, align 8 + %tmp2 = extractelement <4 x i16> %tmp1, i32 2 + store i16 %tmp2, i16* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[2]}, [r0, :16] ret void @@ -21,8 +21,8 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { %tmp1 = load <2 x i32>* %B - %tmp2 = extractelement <2 x i32> %tmp1, i32 1 - store i32 %tmp2, i32* %A, align 8 + %tmp2 = extractelement <2 x i32> %tmp1, i32 1 + store i32 %tmp2, i32* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.32 {d{{[0-9]+}}[1]}, [r0, :32] ret void @@ -30,8 +30,10 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { %tmp1 = load <16 x i8>* %B - %tmp2 = extractelement <16 x i8> %tmp1, i32 9 - store i8 %tmp2, i8* %A, align 8 +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] + %tmp2 = extractelement <16 x i8> %tmp1, i32 9 + store i8 %tmp2, i8* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[1]}, [r0] ret void @@ -39,8 +41,10 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind { %tmp1 = load <8 x i16>* %B - %tmp2 = extractelement <8 x i16> %tmp1, i32 5 - store i16 %tmp2, i16* %A, align 8 +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] + %tmp2 = extractelement <8 x i16> %tmp1, i32 5 + store i16 %tmp2, i16* %A, align 8 ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[1]}, [r0, :16] ret void @@ -75,6 +79,8 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] @@ -84,6 +90,8 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0, :64] @@ -145,6 +153,8 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0, :64] @@ -154,6 +164,8 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) ; CHECK: bic r0, r0, #3221225472 ; CHECK-NEXT: vst4.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0] |