diff options
| author | Dan Bailey <dan@dneg.com> | 2011-06-24 19:27:10 +0000 |
|---|---|---|
| committer | Dan Bailey <dan@dneg.com> | 2011-06-24 19:27:10 +0000 |
| commit | b05a8a8f02ee3ec78eb6171f2f3078fe2ed4ff7e (patch) | |
| tree | bc6dd6097b870992b67a120696a07e386636702b /test/CodeGen/PTX | |
| parent | ef01edf1e938ef89e598ec558c50ceb2681c5ac4 (diff) | |
PTX: Add support for i8 type and introduce associated .b8 registers
The i8 type is required for boolean values, but can only use ld, st and mov instructions. The i1 type continues to be used for predicates.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133814 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/PTX')
| -rw-r--r-- | test/CodeGen/PTX/cvt.ll | 83 | ||||
| -rw-r--r-- | test/CodeGen/PTX/ld.ll | 77 | ||||
| -rw-r--r-- | test/CodeGen/PTX/mov.ll | 12 | ||||
| -rw-r--r-- | test/CodeGen/PTX/st.ll | 78 |
4 files changed, 250 insertions, 0 deletions
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll index 18f7ef365b..dbabbf8938 100644 --- a/test/CodeGen/PTX/cvt.ll +++ b/test/CodeGen/PTX/cvt.ll @@ -3,6 +3,17 @@ ; preds ; (note: we convert back to i32 to return) +define ptx_device i32 @cvt_pred_i8(i8 %x, i1 %y) { +; CHECK: setp.gt.b8 p[[P0:[0-9]+]], rq{{[0-9]+}}, 0 +; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; +; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]]; +; CHECK-NEXT: ret; + %a = trunc i8 %x to i1 + %b = and i1 %a, %y + %c = zext i1 %b to i32 + ret i32 %c +} + define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) { ; CHECK: setp.gt.b16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0 ; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}}; @@ -58,6 +69,43 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) { ret i32 %c } +; i8 + +define ptx_device i8 @cvt_i8_preds(i1 %x) { +; CHECK: selp.u8 rq{{[0-9]+}}, 1, 0, p{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = zext i1 %x to i8 + ret i8 %a +} + +define ptx_device i8 @cvt_i8_i32(i32 %x) { +; CHECK: cvt.u8.u32 rq{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = trunc i32 %x to i8 + ret i8 %a +} + +define ptx_device i8 @cvt_i8_i64(i64 %x) { +; CHECK: cvt.u8.u64 rq{{[0-9]+}}, rd{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = trunc i64 %x to i8 + ret i8 %a +} + +define ptx_device i8 @cvt_i8_f32(float %x) { +; CHECK: cvt.rzi.u8.f32 rq{{[0-9]+}}, r{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = fptoui float %x to i8 + ret i8 %a +} + +define ptx_device i8 @cvt_i8_f64(double %x) { +; CHECK: cvt.rzi.u8.f64 rq{{[0-9]+}}, rd{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = fptoui double %x to i8 + ret i8 %a +} + ; i16 define ptx_device i16 @cvt_i16_preds(i1 %x) { @@ -67,6 +115,13 @@ define ptx_device i16 @cvt_i16_preds(i1 %x) { ret i16 %a } +define ptx_device i16 @cvt_i16_i8(i8 %x) { +; CHECK: cvt.u16.u8 rh{{[0-9]+}}, rq{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = zext i8 %x to i16 + ret i16 %a +} + define ptx_device i16 @cvt_i16_i32(i32 %x) { ; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}}; ; CHECK-NEXT: ret; @@ -104,6 +159,13 @@ define ptx_device i32 @cvt_i32_preds(i1 %x) { ret i32 %a } +define ptx_device i32 @cvt_i32_i8(i8 %x) { +; CHECK: cvt.u32.u8 r{{[0-9]+}}, rq{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = zext i8 %x to i32 + ret i32 %a +} + define ptx_device i32 @cvt_i32_i16(i16 %x) { ; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; @@ -141,6 +203,13 @@ define ptx_device i64 @cvt_i64_preds(i1 %x) { ret i64 %a } +define ptx_device i64 @cvt_i64_i8(i8 %x) { +; CHECK: cvt.u64.u8 rd{{[0-9]+}}, rq{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = zext i8 %x to i64 + ret i64 %a +} + define ptx_device i64 @cvt_i64_i16(i16 %x) { ; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; @@ -178,6 +247,13 @@ define ptx_device float @cvt_f32_preds(i1 %x) { ret float %a } +define ptx_device float @cvt_f32_i8(i8 %x) { +; CHECK: cvt.rn.f32.u8 r{{[0-9]+}}, rq{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = uitofp i8 %x to float + ret float %a +} + define ptx_device float @cvt_f32_i16(i16 %x) { ; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; @@ -215,6 +291,13 @@ define ptx_device double @cvt_f64_preds(i1 %x) { ret double %a } +define ptx_device double @cvt_f64_i8(i8 %x) { +; CHECK: cvt.rn.f64.u8 rd{{[0-9]+}}, rq{{[0-9]+}}; +; CHECK-NEXT: ret; + %a = uitofp i8 %x to double + ret double %a +} + define ptx_device double @cvt_f64_i16(i16 %x) { ; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK-NEXT: ret; diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index d184d1243a..951b14b864 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -1,5 +1,17 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s +;CHECK: .extern .global .b8 array_i8[10]; +@array_i8 = external global [10 x i8] + +;CHECK: .extern .const .b8 array_constant_i8[10]; +@array_constant_i8 = external addrspace(1) constant [10 x i8] + +;CHECK: .extern .local .b8 array_local_i8[10]; +@array_local_i8 = external addrspace(2) global [10 x i8] + +;CHECK: .extern .shared .b8 array_shared_i8[10]; +@array_shared_i8 = external addrspace(4) global [10 x i8] + ;CHECK: .extern .global .b8 array_i16[20]; @array_i16 = external global [10 x i16] @@ -60,6 +72,13 @@ ;CHECK: .extern .shared .b8 array_shared_double[80]; @array_shared_double = external addrspace(4) global [10 x double] +define ptx_device i8 @t1_u8(i8* %p) { +entry: +;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}]; +;CHECK-NEXT: ret; + %x = load i8* %p + ret i8 %x +} define ptx_device i16 @t1_u16(i16* %p) { entry: @@ -101,6 +120,15 @@ entry: ret double %x } +define ptx_device i8 @t2_u8(i8* %p) { +entry: +;CHECK: ld.global.u8 rq{{[0-9]+}}, [r{{[0-9]+}}+1]; +;CHECK-NEXT: ret; + %i = getelementptr i8* %p, i32 1 + %x = load i8* %i + ret i8 %x +} + define ptx_device i16 @t2_u16(i16* %p) { entry: ;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2]; @@ -146,6 +174,15 @@ entry: ret double %x } +define ptx_device i8 @t3_u8(i8* %p, i32 %q) { +entry: +;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]]; + %i = getelementptr i8* %p, i32 %q + %x = load i8* %i + ret i8 %x +} + define ptx_device i16 @t3_u16(i16* %p, i32 %q) { entry: ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; @@ -196,6 +233,16 @@ entry: ret double %x } +define ptx_device i8 @t4_global_u8() { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8; +;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8]* @array_i8, i32 0, i32 0 + %x = load i8* %i + ret i8 %x +} + define ptx_device i16 @t4_global_u16() { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; @@ -296,6 +343,16 @@ entry: ret double %x } +define ptx_device i8 @t4_local_u8() { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8; +;CHECK-NEXT: ld.local.u8 rq{{[0-9]+}}, [r[[R0]]]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0 + %x = load i8 addrspace(2)* %i + ret i8 %x +} + define ptx_device i16 @t4_local_u16() { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; @@ -346,6 +403,16 @@ entry: ret double %x } +define ptx_device i8 @t4_shared_u8() { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8; +;CHECK-NEXT: ld.shared.u8 rq{{[0-9]+}}, [r[[R0]]]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0 + %x = load i8 addrspace(4)* %i + ret i8 %x +} + define ptx_device i16 @t4_shared_u16() { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; @@ -396,6 +463,16 @@ entry: ret double %x } +define ptx_device i8 @t5_u8() { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8; +;CHECK-NEXT: ld.global.u8 rq{{[0-9]+}}, [r[[R0]]+1]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1 + %x = load i8* %i + ret i8 %x +} + define ptx_device i16 @t5_u16() { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index cce6a5b897..b930b4caef 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -1,5 +1,11 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s +define ptx_device i8 @t1_u8() { +; CHECK: mov.u8 rq{{[0-9]+}}, 0; +; CHECK: ret; + ret i8 0 +} + define ptx_device i16 @t1_u16() { ; CHECK: mov.u16 rh{{[0-9]+}}, 0; ; CHECK: ret; @@ -30,6 +36,12 @@ define ptx_device double @t1_f64() { ret double 0.0 } +define ptx_device i8 @t2_u8(i8 %x) { +; CHECK: mov.u8 rq{{[0-9]+}}, rq{{[0-9]+}}; +; CHECK: ret; + ret i8 %x +} + define ptx_device i16 @t2_u16(i16 %x) { ; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}}; ; CHECK: ret; diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index b08528e1c3..596d189e4b 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -1,5 +1,17 @@ ; RUN: llc < %s -march=ptx32 | FileCheck %s +;CHECK: .extern .global .b8 array_i8[10]; +@array_i8 = external global [10 x i8] + +;CHECK: .extern .const .b8 array_constant_i8[10]; +@array_constant_i8 = external addrspace(1) constant [10 x i8] + +;CHECK: .extern .local .b8 array_local_i8[10]; +@array_local_i8 = external addrspace(2) global [10 x i8] + +;CHECK: .extern .shared .b8 array_shared_i8[10]; +@array_shared_i8 = external addrspace(4) global [10 x i8] + ;CHECK: .extern .global .b8 array_i16[20]; @array_i16 = external global [10 x i16] @@ -60,6 +72,13 @@ ;CHECK: .extern .shared .b8 array_shared_double[80]; @array_shared_double = external addrspace(4) global [10 x double] +define ptx_device void @t1_u8(i8* %p, i8 %x) { +entry: +;CHECK: st.global.u8 [r{{[0-9]+}}], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + store i8 %x, i8* %p + ret void +} define ptx_device void @t1_u16(i16* %p, i16 %x) { entry: @@ -101,6 +120,15 @@ entry: ret void } +define ptx_device void @t2_u8(i8* %p, i8 %x) { +entry: +;CHECK: st.global.u8 [r{{[0-9]+}}+1], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + %i = getelementptr i8* %p, i32 1 + store i8 %x, i8* %i + ret void +} + define ptx_device void @t2_u16(i16* %p, i16 %x) { entry: ;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}}; @@ -146,6 +174,16 @@ entry: ret void } +define ptx_device void @t3_u8(i8* %p, i32 %q, i8 %x) { +entry: +;CHECK: add.u32 r[[R0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}; +;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + %i = getelementptr i8* %p, i32 %q + store i8 %x, i8* %i + ret void +} + define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { entry: ;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1; @@ -201,6 +239,16 @@ entry: ret void } +define ptx_device void @t4_global_u8(i8 %x) { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8; +;CHECK-NEXT: st.global.u8 [r[[R0]]], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8]* @array_i8, i8 0, i8 0 + store i8 %x, i8* %i + ret void +} + define ptx_device void @t4_global_u16(i16 %x) { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; @@ -251,6 +299,16 @@ entry: ret void } +define ptx_device void @t4_local_u8(i8 %x) { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i8; +;CHECK-NEXT: st.local.u8 [r[[R0]]], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8] addrspace(2)* @array_local_i8, i32 0, i32 0 + store i8 %x, i8 addrspace(2)* %i + ret void +} + define ptx_device void @t4_local_u16(i16 %x) { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16; @@ -301,6 +359,16 @@ entry: ret void } +define ptx_device void @t4_shared_u8(i8 %x) { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i8; +;CHECK-NEXT: st.shared.u8 [r[[R0]]], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8] addrspace(4)* @array_shared_i8, i32 0, i32 0 + store i8 %x, i8 addrspace(4)* %i + ret void +} + define ptx_device void @t4_shared_u16(i16 %x) { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16; @@ -351,6 +419,16 @@ entry: ret void } +define ptx_device void @t5_u8(i8 %x) { +entry: +;CHECK: mov.u32 r[[R0:[0-9]+]], array_i8; +;CHECK-NEXT: st.global.u8 [r[[R0]]+1], rq{{[0-9]+}}; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i8]* @array_i8, i32 0, i32 1 + store i8 %x, i8* %i + ret void +} + define ptx_device void @t5_u16(i16 %x) { entry: ;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16; |
