diff options
Diffstat (limited to 'test/CodeGen/NVPTX')
-rw-r--r-- | test/CodeGen/NVPTX/annotations.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/arithmetic-fp-sm10.ll | 72 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/arithmetic-int.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/calling-conv.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/compare-int.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/convert-fp.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/convert-int-sm10.ll | 55 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/intrinsic-old.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/intrinsics.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/ld-addrspace.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/nvvm-reflect.ll | 34 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/sched1.ll | 31 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/sched2.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/sm-version-10.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/sm-version-11.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/sm-version-12.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/sm-version-13.ll | 6 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/st-addrspace.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/tuple-literal.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/vector-args.ll | 27 | ||||
-rw-r--r-- | test/CodeGen/NVPTX/vector-loads.ll | 16 |
21 files changed, 133 insertions, 178 deletions
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll index d93f688ef1..39d52d3826 100644 --- a/test/CodeGen/NVPTX/annotations.ll +++ b/test/CodeGen/NVPTX/annotations.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll deleted file mode 100644 index 73c77f56bc..0000000000 --- a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s - -;; These tests should run for all targets - -;;===-- Basic instruction selection tests ---------------------------------===;; - - -;;; f64 - -define double @fadd_f64(double %a, double %b) { -; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fadd double %a, %b - ret double %ret -} - -define double @fsub_f64(double %a, double %b) { -; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fsub double %a, %b - ret double %ret -} - -define double @fmul_f64(double %a, double %b) { -; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fmul double %a, %b - ret double %ret -} - -define double @fdiv_f64(double %a, double %b) { -; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fdiv double %a, %b - ret double %ret -} - -;; PTX does not have a floating-point rem instruction - - -;;; f32 - -define float @fadd_f32(float %a, float %b) { -; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fadd float %a, %b - ret float %ret -} - -define float @fsub_f32(float %a, float %b) { -; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fsub float %a, %b - ret float %ret -} - -define float @fmul_f32(float %a, float %b) { -; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fmul float %a, %b - ret float %ret -} - -define float @fdiv_f32(float %a, float %b) { -; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fdiv float %a, %b - ret float %ret -} - -;; PTX does not have a floating-point rem instruction diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll index 529f84900a..8d73b7e6c4 100644 --- a/test/CodeGen/NVPTX/arithmetic-int.ll +++ b/test/CodeGen/NVPTX/arithmetic-int.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll index 968203e5f7..190a1462ad 100644 --- a/test/CodeGen/NVPTX/calling-conv.ll +++ b/test/CodeGen/NVPTX/calling-conv.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll index 12fc754821..16af0a336d 100644 --- a/test/CodeGen/NVPTX/compare-int.ll +++ b/test/CodeGen/NVPTX/compare-int.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll index 21c84379b0..1882121fa7 100644 --- a/test/CodeGen/NVPTX/convert-fp.ll +++ b/test/CodeGen/NVPTX/convert-fp.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll deleted file mode 100644 index 20716f982e..0000000000 --- a/test/CodeGen/NVPTX/convert-int-sm10.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s - - -; i16 - -define i16 @cvt_i16_i32(i32 %x) { -; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: ret - %a = trunc i32 %x to i16 - ret i16 %a -} - -define i16 @cvt_i16_i64(i64 %x) { -; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}} -; CHECK: ret - %a = trunc i64 %x to i16 - ret i16 %a -} - - - -; i32 - -define i32 @cvt_i32_i16(i16 %x) { -; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: ret - %a = zext i16 %x to i32 - ret i32 %a -} - -define i32 @cvt_i32_i64(i64 %x) { -; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}} -; CHECK: ret - %a = trunc i64 %x to i32 - ret i32 %a -} - - - -; i64 - -define i64 @cvt_i64_i16(i16 %x) { -; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: ret - %a = zext i16 %x to i64 - ret i64 %a -} - -define i64 @cvt_i64_i32(i32 %x) { -; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: ret - %a = zext i32 %x to i64 - ret i64 %a -} diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll index 1c9879c417..53a28f3337 100644 --- a/test/CodeGen/NVPTX/intrinsic-old.ll +++ b/test/CodeGen/NVPTX/intrinsic-old.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll index afab60ca96..8b0357be87 100644 --- a/test/CodeGen/NVPTX/intrinsics.ll +++ b/test/CodeGen/NVPTX/intrinsics.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll index d1f5093df2..3265868d3c 100644 --- a/test/CodeGen/NVPTX/ld-addrspace.ll +++ b/test/CodeGen/NVPTX/ld-addrspace.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll new file mode 100644 index 0000000000..0d02194651 --- /dev/null +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0 +; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1 + +@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00" + +declare i32 @__nvvm_reflect(i8*) +declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) + +define float @foo(float %a, float %b) { +; USE_MUL_0: define float @foo +; USE_MUL_0-NOT: call i32 @__nvvm_reflect +; USE_MUL_1: define float @foo +; USE_MUL_1-NOT: call i32 @__nvvm_reflect + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0)) + %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) + %cmp = icmp ugt i32 %reflect, 0 + br i1 %cmp, label %use_mul, label %use_add + +use_mul: +; USE_MUL_1: fmul float %a, %b +; USE_MUL_0-NOT: fadd float %a, %b + %ret1 = fmul float %a, %b + br label %exit + +use_add: +; USE_MUL_0: fadd float %a, %b +; USE_MUL_1-NOT: fmul float %a, %b + %ret2 = fadd float %a, %b + br label %exit + +exit: + %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] + ret float %ret +} diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll new file mode 100644 index 0000000000..03ab635e73 --- /dev/null +++ b/test/CodeGen/NVPTX/sched1.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; Ensure source scheduling is working + +define void @foo(i32* %a) { +; CHECK: .func foo +; CHECK: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 + %ptr0 = getelementptr i32* %a, i32 0 + %val0 = load i32* %ptr0 + %ptr1 = getelementptr i32* %a, i32 1 + %val1 = load i32* %ptr1 + %ptr2 = getelementptr i32* %a, i32 2 + %val2 = load i32* %ptr2 + %ptr3 = getelementptr i32* %a, i32 3 + %val3 = load i32* %ptr3 + + %t0 = add i32 %val0, %val1 + %t1 = add i32 %t0, %val2 + %t2 = add i32 %t1, %val3 + + store i32 %t2, i32* %a + + ret void +} + diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll new file mode 100644 index 0000000000..71a9a4963f --- /dev/null +++ b/test/CodeGen/NVPTX/sched2.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +define void @foo(<2 x i32>* %a) { +; CHECK: .func foo +; CHECK: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 + %ptr0 = getelementptr <2 x i32>* %a, i32 0 + %val0 = load <2 x i32>* %ptr0 + %ptr1 = getelementptr <2 x i32>* %a, i32 1 + %val1 = load <2 x i32>* %ptr1 + %ptr2 = getelementptr <2 x i32>* %a, i32 2 + %val2 = load <2 x i32>* %ptr2 + %ptr3 = getelementptr <2 x i32>* %a, i32 3 + %val3 = load <2 x i32>* %ptr3 + + %t0 = add <2 x i32> %val0, %val1 + %t1 = add <2 x i32> %t0, %val2 + %t2 = add <2 x i32> %t1, %val3 + + store <2 x i32> %t2, <2 x i32>* %a + + ret void +} + diff --git a/test/CodeGen/NVPTX/sm-version-10.ll b/test/CodeGen/NVPTX/sm-version-10.ll deleted file mode 100644 index 9324a37809..0000000000 --- a/test/CodeGen/NVPTX/sm-version-10.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s - - -; CHECK: .target sm_10 - diff --git a/test/CodeGen/NVPTX/sm-version-11.ll b/test/CodeGen/NVPTX/sm-version-11.ll deleted file mode 100644 index 9033a4eba5..0000000000 --- a/test/CodeGen/NVPTX/sm-version-11.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_11 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_11 | FileCheck %s - - -; CHECK: .target sm_11 - diff --git a/test/CodeGen/NVPTX/sm-version-12.ll b/test/CodeGen/NVPTX/sm-version-12.ll deleted file mode 100644 index d8ee85c901..0000000000 --- a/test/CodeGen/NVPTX/sm-version-12.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_12 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_12 | FileCheck %s - - -; CHECK: .target sm_12 - diff --git a/test/CodeGen/NVPTX/sm-version-13.ll b/test/CodeGen/NVPTX/sm-version-13.ll deleted file mode 100644 index ad67d642ce..0000000000 --- a/test/CodeGen/NVPTX/sm-version-13.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_13 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_13 | FileCheck %s - - -; CHECK: .target sm_13 - diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll index 54e04ae610..0b26d802df 100644 --- a/test/CodeGen/NVPTX/st-addrspace.ll +++ b/test/CodeGen/NVPTX/st-addrspace.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 diff --git a/test/CodeGen/NVPTX/tuple-literal.ll b/test/CodeGen/NVPTX/tuple-literal.ll index 5c0cb2c15c..2b1f2c4b66 100644 --- a/test/CodeGen/NVPTX/tuple-literal.ll +++ b/test/CodeGen/NVPTX/tuple-literal.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_13 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 define ptx_device void @test_function({i8, i8}*) { ret void diff --git a/test/CodeGen/NVPTX/vector-args.ll b/test/CodeGen/NVPTX/vector-args.ll new file mode 100644 index 0000000000..80deae4693 --- /dev/null +++ b/test/CodeGen/NVPTX/vector-args.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +define float @foo(<2 x float> %a) { +; CHECK: .func (.param .b32 func_retval0) foo +; CHECK: .param .align 8 .b8 foo_param_0[8] +; CHECK: ld.param.f32 %f{{[0-9]+}} +; CHECK: ld.param.f32 %f{{[0-9]+}} + %t1 = fmul <2 x float> %a, %a + %t2 = extractelement <2 x float> %t1, i32 0 + %t3 = extractelement <2 x float> %t1, i32 1 + %t4 = fadd float %t2, %t3 + ret float %t4 +} + + +define float @bar(<4 x float> %a) { +; CHECK: .func (.param .b32 func_retval0) bar +; CHECK: .param .align 16 .b8 bar_param_0[16] +; CHECK: ld.param.f32 %f{{[0-9]+}} +; CHECK: ld.param.f32 %f{{[0-9]+}} + %t1 = fmul <4 x float> %a, %a + %t2 = extractelement <4 x float> %t1, i32 0 + %t3 = extractelement <4 x float> %t1, i32 1 + %t4 = fadd float %t2, %t3 + ret float %t4 +} diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll index f5a1795e3c..58882bf166 100644 --- a/test/CodeGen/NVPTX/vector-loads.ll +++ b/test/CodeGen/NVPTX/vector-loads.ll @@ -9,7 +9,7 @@ define void @foo(<2 x float>* %a) { ; CHECK: .func foo -; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <2 x float>* %a %t2 = fmul <2 x float> %t1, %t1 store <2 x float> %t2, <2 x float>* %a @@ -18,7 +18,7 @@ define void @foo(<2 x float>* %a) { define void @foo2(<4 x float>* %a) { ; CHECK: .func foo2 -; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <4 x float>* %a %t2 = fmul <4 x float> %t1, %t1 store <4 x float> %t2, <4 x float>* %a @@ -27,8 +27,8 @@ define void @foo2(<4 x float>* %a) { define void @foo3(<8 x float>* %a) { ; CHECK: .func foo3 -; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; -; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16]; +; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} +; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <8 x float>* %a %t2 = fmul <8 x float> %t1, %t1 store <8 x float> %t2, <8 x float>* %a @@ -39,7 +39,7 @@ define void @foo3(<8 x float>* %a) { define void @foo4(<2 x i32>* %a) { ; CHECK: .func foo4 -; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <2 x i32>* %a %t2 = mul <2 x i32> %t1, %t1 store <2 x i32> %t2, <2 x i32>* %a @@ -48,7 +48,7 @@ define void @foo4(<2 x i32>* %a) { define void @foo5(<4 x i32>* %a) { ; CHECK: .func foo5 -; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <4 x i32>* %a %t2 = mul <4 x i32> %t1, %t1 store <4 x i32> %t2, <4 x i32>* %a @@ -57,8 +57,8 @@ define void @foo5(<4 x i32>* %a) { define void @foo6(<8 x i32>* %a) { ; CHECK: .func foo6 -; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; -; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16]; +; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} +; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <8 x i32>* %a %t2 = mul <8 x i32> %t1, %t1 store <8 x i32> %t2, <8 x i32>* %a |