diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-03-14 10:51:38 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-03-14 10:51:38 +0000 |
commit | 647735c781c5b37061ee03d6e9e6c7dda92218e2 (patch) | |
tree | 5a5e56606d41060263048b5a5586b3d2380898ba /test/CodeGen/R600 | |
parent | 6aed25d93d1cfcde5809a73ffa7dc1b0d6396f66 (diff) | |
parent | f635ef401786c84df32090251a8cf45981ecca33 (diff) |
Updating branches/google/stable to r176857
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@177040 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/R600')
58 files changed, 1875 insertions, 0 deletions
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll new file mode 100644 index 0000000000..114f9e7447 --- /dev/null +++ b/test/CodeGen/R600/128bit-kernel-args.ll @@ -0,0 +1,18 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @v4i32_kernel_arg +; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40 + +define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { +entry: + store <4 x i32> %in, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK: @v4f32_kernel_arg +; CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 40 +define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + store <4 x float> %in, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.v4i32.ll new file mode 100644 index 0000000000..ac4a87417b --- /dev/null +++ b/test/CodeGen/R600/add.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = add <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.v4i32.ll new file mode 100644 index 0000000000..662085e2d6 --- /dev/null +++ b/test/CodeGen/R600/and.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = and <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll new file mode 100644 index 0000000000..fd958b3659 --- /dev/null +++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -0,0 +1,36 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This test is for a bug in +; DAGCombiner::reduceBuildVecConvertToConvertBuildVec() where +; the wrong type was being passed to +; TargetLowering::getOperationAction() when checking the legality of +; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes. + + +; CHECK: @sint +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %sint = load i32 addrspace(1) * %in + %conv = sitofp i32 %sint to float + %0 = insertelement <4 x float> undef, float %conv, i32 0 + %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, <4 x float> addrspace(1)* %out + ret void +} + +;CHECK: @uint +;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %uint = load i32 addrspace(1) * %in + %conv = uitofp i32 %uint to float + %0 = insertelement <4 x float> undef, float %conv, i32 0 + %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll new file mode 100644 index 0000000000..a58674269a --- /dev/null +++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; PRED_SET* instructions must be tied to any instruction that uses their +; result. This tests that there are no instructions between the PRED_SET* +; and the PREDICATE_BREAK in this loop. + +; CHECK: @loop_ge +; CHECK: WHILE +; CHECK: PRED_SET +; CHECK-NEXT: PREDICATED_BREAK +define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { +entry: + %cmp5 = icmp sgt i32 %iterations, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ] + %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %i.07 = add nsw i32 %i.07.in, -1 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06 + store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4 + %add = add nsw i32 %ai.06, 1 + %exitcond = icmp eq i32 %add, %iterations + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll new file mode 100644 index 0000000000..0407533eaa --- /dev/null +++ b/test/CodeGen/R600/fabs.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @fabs( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @fabs(float ) readnone diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll new file mode 100644 index 0000000000..d7d1b6572c --- /dev/null +++ b/test/CodeGen/R600/fadd.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fadd float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll new file mode 100644 index 0000000000..85dbfd52cb --- /dev/null +++ b/test/CodeGen/R600/fadd.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fadd <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll new file mode 100644 index 0000000000..a94cfb5cf2 --- /dev/null +++ b/test/CodeGen/R600/fcmp-cnd.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;Not checking arguments 2 and 3 to CNDE, because they may change between +;registers and literal.x depending on what the optimizer does. +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %cmp = fcmp oeq float %0, 0.000000e+00 + %value = select i1 %cmp, i32 2, i32 3 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll new file mode 100644 index 0000000000..55aba0d72d --- /dev/null +++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This test checks a bug in R600TargetLowering::LowerSELECT_CC where the +; chance to optimize the fcmp + select instructions to SET* was missed +; due to the fact that the operands to fcmp and select had different types + +; CHECK: SET{{[A-Z]+}}_DX10 + +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %cmp = fcmp oeq float %0, 0.000000e+00 + %value = select i1 %cmp, i32 -1, i32 0 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll new file mode 100644 index 0000000000..37f621d239 --- /dev/null +++ b/test/CodeGen/R600/fcmp.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @fcmp_sext +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) { +entry: + %0 = load float addrspace(1)* %in + %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1 + %1 = load float addrspace(1)* %arrayidx1 + %cmp = fcmp oeq float %0, %1 + %sext = sext i1 %cmp to i32 + store i32 %sext, i32 addrspace(1)* %out + ret void +} + +; This test checks that a setcc node with f32 operands is lowered to a +; SET*_DX10 instruction. Previously we were lowering this to: +; SET* + FP_TO_SINT + +; CHECK: @fcmp_br +; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0 + +define void @fcmp_br(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp oeq float %in, 5.0 + br i1 %0, label %IF, label %ENDIF + +IF: + %1 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %1 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.v4f32.ll new file mode 100644 index 0000000000..79e677f541 --- /dev/null +++ b/test/CodeGen/R600/fdiv.v4f32.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fdiv <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll new file mode 100644 index 0000000000..845330f284 --- /dev/null +++ b/test/CodeGen/R600/floor.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @floor(float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @floor(float) readonly diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll new file mode 100644 index 0000000000..a3d4d0ff0d --- /dev/null +++ b/test/CodeGen/R600/fmad.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.R600.load.input(i32 2) + %r3 = fmul float %r0, %r1 + %r4 = fadd float %r3, %r2 + call void @llvm.AMDGPU.store.output(float %r4, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @fabs(float ) readnone diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll new file mode 100644 index 0000000000..3708f0b9ee --- /dev/null +++ b/test/CodeGen/R600/fmax.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + call void @llvm.AMDGPU.store.output(float %r3, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll new file mode 100644 index 0000000000..19d59ab306 --- /dev/null +++ b/test/CodeGen/R600/fmin.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r1, float %r0 + call void @llvm.AMDGPU.store.output(float %r3, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll new file mode 100644 index 0000000000..eb1d523c0b --- /dev/null +++ b/test/CodeGen/R600/fmul.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fmul float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll new file mode 100644 index 0000000000..6d44a0c5c7 --- /dev/null +++ b/test/CodeGen/R600/fmul.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fmul <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll new file mode 100644 index 0000000000..591aa52676 --- /dev/null +++ b/test/CodeGen/R600/fsub.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fsub float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll new file mode 100644 index 0000000000..612a57e4b6 --- /dev/null +++ b/test/CodeGen/R600/fsub.v4f32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float> addrspace(1) * %in + %b = load <4 x float> addrspace(1) * %b_ptr + %result = fsub <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8_to_double_to_float.ll new file mode 100644 index 0000000000..39f33227fa --- /dev/null +++ b/test/CodeGen/R600/i8_to_double_to_float.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = uitofp i8 %1 to double + %3 = fptrunc double %2 to float + store float %3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll new file mode 100644 index 0000000000..71705a64f5 --- /dev/null +++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll @@ -0,0 +1,18 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;Test that a select with reversed True/False values is correctly lowered +;to a SETNE_INT. There should only be one SETNE_INT instruction. + +;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NOT: SETNE_INT + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1 + %1 = load i32 addrspace(1)* %arrayidx1 + %cmp = icmp eq i32 %0, %1 + %value = select i1 %cmp, i32 0, i32 -1 + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll new file mode 100644 index 0000000000..82fb925c0d --- /dev/null +++ b/test/CodeGen/R600/kcache-fold.ll @@ -0,0 +1,52 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}} + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(8)* null + %1 = extractelement <4 x float> %0, i32 0 + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %3 = extractelement <4 x float> %2, i32 0 + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %5 = extractelement <4 x float> %4, i32 0 + %6 = fcmp ult float %1, 0.000000e+00 + %7 = select i1 %6, float %3, float %5 + %8 = load <4 x float> addrspace(8)* null + %9 = extractelement <4 x float> %8, i32 1 + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %11 = extractelement <4 x float> %10, i32 1 + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %13 = extractelement <4 x float> %12, i32 1 + %14 = fcmp ult float %9, 0.000000e+00 + %15 = select i1 %14, float %11, float %13 + %16 = load <4 x float> addrspace(8)* null + %17 = extractelement <4 x float> %16, i32 2 + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %19 = extractelement <4 x float> %18, i32 2 + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %21 = extractelement <4 x float> %20, i32 2 + %22 = fcmp ult float %17, 0.000000e+00 + %23 = select i1 %22, float %19, float %21 + %24 = load <4 x float> addrspace(8)* null + %25 = extractelement <4 x float> %24, i32 3 + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %27 = extractelement <4 x float> %26, i32 3 + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %29 = extractelement <4 x float> %28, i32 3 + %30 = fcmp ult float %25, 0.000000e+00 + %31 = select i1 %30, float %27, float %29 + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00) + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00) + %36 = insertelement <4 x float> undef, float %32, i32 0 + %37 = insertelement <4 x float> %36, float %33, i32 1 + %38 = insertelement <4 x float> %37, float %34, i32 2 + %39 = insertelement <4 x float> %38, float %35, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0) + ret void +} + +declare float @llvm.AMDIL.clamp.(float, float, float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll new file mode 100644 index 0000000000..1aae7f9f91 --- /dev/null +++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; This tests a bug where LegalizeDAG was not checking the target's +; BooleanContents value and always using one for true, when expanding +; setcc to select_cc. +; +; This bug caused the icmp IR instruction to be expanded to two machine +; instructions, when only one is needed. +; + +; CHECK: @setcc_expand +; CHECK: SET +; CHECK-NOT: CND +define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp eq i32 %in, 5 + br i1 %0, label %IF, label %ENDIF +IF: + %1 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %1 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/lit.local.cfg b/test/CodeGen/R600/lit.local.cfg new file mode 100644 index 0000000000..36ee493e59 --- /dev/null +++ b/test/CodeGen/R600/lit.local.cfg @@ -0,0 +1,13 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +targets = set(root.targets_to_build.split()) +if not 'R600' in targets: + config.unsupported = True + diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll new file mode 100644 index 0000000000..e69f64e0e1 --- /dev/null +++ b/test/CodeGen/R600/literals.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Test using an integer literal constant. +; Generated ASM should be: +; ADD_INT REG literal.x, 5 +; or +; ADD_INT literal.x REG, 5 + +; CHECK: @i32_literal +; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5 +define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = add i32 5, %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Test using a float literal constant. +; Generated ASM should be: +; ADD REG literal.x, 5.0 +; or +; ADD literal.x REG, 5.0 + +; CHECK: @float_literal +; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0 +define void @float_literal(float addrspace(1)* %out, float %in) { +entry: + %0 = fadd float 5.0, %in + store float %0, float addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll new file mode 100644 index 0000000000..693eb27457 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -0,0 +1,17 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.mul(float ,float ) readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll new file mode 100644 index 0000000000..74331fa269 --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll @@ -0,0 +1,42 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 1 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 2 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 3 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 4 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 5 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 6 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 7 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 8 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 9 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 10 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 11 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 12 +;CHECK: TEX_SAMPLE_CT{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 13 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 14 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 15 +;CHECK: TEX_SAMPLET{{[0-9]+\.XYZW, T[0-9]+\.XYZW}}, 0, 0, 16 + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %addr = load <4 x float> addrspace(1)* %in + %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1) + %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2) + %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3) + %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res3, i32 0, i32 0, i32 4) + %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res4, i32 0, i32 0, i32 5) + %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res5, i32 0, i32 0, i32 6) + %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res6, i32 0, i32 0, i32 7) + %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res7, i32 0, i32 0, i32 8) + %res9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res8, i32 0, i32 0, i32 9) + %res10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res9, i32 0, i32 0, i32 10) + %res11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res10, i32 0, i32 0, i32 11) + %res12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res11, i32 0, i32 0, i32 12) + %res13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res12, i32 0, i32 0, i32 13) + %res14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res13, i32 0, i32 0, i32 14) + %res15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res14, i32 0, i32 0, i32 15) + %res16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res15, i32 0, i32 0, i32 16) + store <4 x float> %res16, <4 x float> addrspace(1)* %out + ret void +} + +declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll new file mode 100644 index 0000000000..fac957f7ee --- /dev/null +++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.AMDGPU.trunc( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.trunc(float ) readnone diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll new file mode 100644 index 0000000000..a8f604ac6d --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll @@ -0,0 +1,21 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: S_MOV_B32 +;CHECK-NEXT: V_INTERP_MOV_F32 + +define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" { +main_body: + %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3) + %5 = call i32 @llvm.SI.packf16(float %4, float %4) + %6 = bitcast i32 %5 to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6) + ret void +} + +declare void @llvm.AMDGPU.shader.type(i32) + +declare float @llvm.SI.fs.constant(i32, i32, i32) readonly + +declare i32 @llvm.SI.packf16(float, float) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll new file mode 100644 index 0000000000..d397f3b678 --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -0,0 +1,71 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE_C +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE_C +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE_C +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE_C +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE_C +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE_C +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE +;CHECK-NEXT: S_WAITCNT 1904 +;CHECK-NEXT: IMAGE_SAMPLE + +define void @test() { + %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 1) + %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 2) + %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 3) + %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 4) + %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 5) + %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 6) + %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 7) + %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 8) + %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 9) + %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 10) + %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 11) + %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 12) + %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 13) + %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 14) + %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 15) + %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + <8 x i32> undef, <4 x i32> undef, i32 16) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll new file mode 100644 index 0000000000..dc120bfb00 --- /dev/null +++ b/test/CodeGen/R600/llvm.cos.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.cos.f32(float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.cos.f32(float) readnone + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll new file mode 100644 index 0000000000..0ae9172579 --- /dev/null +++ b/test/CodeGen/R600/llvm.pow.ll @@ -0,0 +1,19 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = call float @llvm.pow.f32( float %r0, float %r1) + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.pow.f32(float ,float ) readonly diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll new file mode 100644 index 0000000000..5cd6998c93 --- /dev/null +++ b/test/CodeGen/R600/llvm.sin.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.sin.f32( float %r0) + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.sin.f32(float) readnone + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll new file mode 100644 index 0000000000..93627283bb --- /dev/null +++ b/test/CodeGen/R600/load.constant_addrspace.f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @test(float addrspace(1)* %out, float addrspace(2)* %in) { + %1 = load float addrspace(2)* %in + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll new file mode 100644 index 0000000000..b070dcd520 --- /dev/null +++ b/test/CodeGen/R600/load.i8.ll @@ -0,0 +1,10 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { + %1 = load i8 addrspace(1)* %in + %2 = zext i8 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll new file mode 100644 index 0000000000..18895a423e --- /dev/null +++ b/test/CodeGen/R600/predicates.ll @@ -0,0 +1,100 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; These tests make sure the compiler is optimizing branches using predicates +; when it is legal to do so. + +; CHECK: @simple_if +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +define void @simple_if(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF, label %ENDIF + +IF: + %1 = shl i32 %in, 1 + br label %ENDIF + +ENDIF: + %2 = phi i32 [ %in, %entry ], [ %1, %IF ] + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; CHECK: @simple_if_else +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF, label %ELSE + +IF: + %1 = shl i32 %in, 1 + br label %ENDIF + +ELSE: + %2 = lshr i32 %in, 1 + br label %ENDIF + +ENDIF: + %3 = phi i32 [ %1, %IF ], [ %2, %ELSE ] + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @nested_if +; CHECK: IF_PREDICATE_SET +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: ENDIF +define void @nested_if(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF0, label %ENDIF + +IF0: + %1 = add i32 %in, 10 + %2 = icmp sgt i32 %1, 0 + br i1 %2, label %IF1, label %ENDIF + +IF1: + %3 = shl i32 %1, 1 + br label %ENDIF + +ENDIF: + %4 = phi i32 [%in, %entry], [%1, %IF0], [%3, %IF1] + store i32 %4, i32 addrspace(1)* %out + ret void +} + +; CHECK: @nested_if_else +; CHECK: IF_PREDICATE_SET +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel +; CHECK: ENDIF +define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + br i1 %0, label %IF0, label %ENDIF + +IF0: + %1 = add i32 %in, 10 + %2 = icmp sgt i32 %1, 0 + br i1 %2, label %IF1, label %ELSE1 + +IF1: + %3 = shl i32 %1, 1 + br label %ENDIF + +ELSE1: + %4 = lshr i32 %in, 1 + br label %ENDIF + +ENDIF: + %5 = phi i32 [%in, %entry], [%3, %IF1], [%4, %ELSE1] + store i32 %5, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll new file mode 100644 index 0000000000..6838c1ae36 --- /dev/null +++ b/test/CodeGen/R600/reciprocal.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = fdiv float 1.0, %r0 + call void @llvm.AMDGPU.store.output(float %r1, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + +declare float @llvm.AMDGPU.rcp(float ) readnone diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll new file mode 100644 index 0000000000..ba9620c40a --- /dev/null +++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll @@ -0,0 +1,83 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = call float @llvm.R600.interp.input(i32 0, i32 0) + %1 = call float @llvm.R600.interp.input(i32 1, i32 0) + %2 = call float @llvm.R600.interp.input(i32 2, i32 0) + %3 = call float @llvm.R600.interp.input(i32 3, i32 0) + %4 = fcmp ult float %1, 0.000000e+00 + %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 + %6 = fsub float -0.000000e+00, %5 + %7 = fptosi float %6 to i32 + %8 = bitcast i32 %7 to float + %9 = fcmp ult float %0, 5.700000e+01 + %10 = select i1 %9, float 1.000000e+00, float 0.000000e+00 + %11 = fsub float -0.000000e+00, %10 + %12 = fptosi float %11 to i32 + %13 = bitcast i32 %12 to float + %14 = bitcast float %8 to i32 + %15 = bitcast float %13 to i32 + %16 = and i32 %14, %15 + %17 = bitcast i32 %16 to float + %18 = bitcast float %17 to i32 + %19 = icmp ne i32 %18, 0 + %20 = fcmp ult float %0, 0.000000e+00 + %21 = select i1 %20, float 1.000000e+00, float 0.000000e+00 + %22 = fsub float -0.000000e+00, %21 + %23 = fptosi float %22 to i32 + %24 = bitcast i32 %23 to float + %25 = bitcast float %24 to i32 + %26 = icmp ne i32 %25, 0 + br i1 %19, label %IF, label %ELSE + +IF: ; preds = %main_body + %. = select i1 %26, float 0.000000e+00, float 1.000000e+00 + %.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00 + br label %ENDIF + +ELSE: ; preds = %main_body + br i1 %26, label %ENDIF, label %ELSE17 + +ENDIF: ; preds = %ELSE17, %ELSE, %IF + %temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ] + %temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ] + %temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ] + %27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00) + %28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00) + %29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00) + %30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %31 = insertelement <4 x float> undef, float %27, i32 0 + %32 = insertelement <4 x float> %31, float %28, i32 1 + %33 = insertelement <4 x float> %32, float %29, i32 2 + %34 = insertelement <4 x float> %33, float %30, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0) + ret void + +ELSE17: ; preds = %ELSE + %35 = fadd float 0.000000e+00, 0x3FC99999A0000000 + %36 = fadd float 0.000000e+00, 0x3FC99999A0000000 + %37 = fadd float 0.000000e+00, 0x3FC99999A0000000 + %38 = fadd float %35, 0x3FC99999A0000000 + %39 = fadd float %36, 0x3FC99999A0000000 + %40 = fadd float %37, 0x3FC99999A0000000 + %41 = fadd float %38, 0x3FC99999A0000000 + %42 = fadd float %39, 0x3FC99999A0000000 + %43 = fadd float %40, 0x3FC99999A0000000 + %44 = fadd float %41, 0x3FC99999A0000000 + %45 = fadd float %42, 0x3FC99999A0000000 + %46 = fadd float %43, 0x3FC99999A0000000 + %47 = fadd float %44, 0x3FC99999A0000000 + %48 = fadd float %45, 0x3FC99999A0000000 + %49 = fadd float %46, 0x3FC99999A0000000 + br label %ENDIF +} + +declare float @llvm.R600.interp.input(i32, i32) #0 + +declare float @llvm.AMDIL.clamp.(float, float, float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll new file mode 100644 index 0000000000..5e875c49ab --- /dev/null +++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll @@ -0,0 +1,88 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(9)* null + %1 = extractelement <4 x float> %0, i32 3 + %2 = fptosi float %1 to i32 + %3 = bitcast i32 %2 to float + %4 = bitcast float %3 to i32 + %5 = sdiv i32 %4, 4 + %6 = bitcast i32 %5 to float + %7 = bitcast float %6 to i32 + %8 = mul i32 %7, 4 + %9 = bitcast i32 %8 to float + %10 = bitcast float %9 to i32 + %11 = sub i32 0, %10 + %12 = bitcast i32 %11 to float + %13 = bitcast float %3 to i32 + %14 = bitcast float %12 to i32 + %15 = add i32 %13, %14 + %16 = bitcast i32 %15 to float + %17 = load <4 x float> addrspace(9)* null + %18 = extractelement <4 x float> %17, i32 0 + %19 = load <4 x float> addrspace(9)* null + %20 = extractelement <4 x float> %19, i32 1 + %21 = load <4 x float> addrspace(9)* null + %22 = extractelement <4 x float> %21, i32 2 + br label %LOOP + +LOOP: ; preds = %IF31, %main_body + %temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ] + %temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ] + %temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ] + %temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ] + %23 = bitcast float %temp12.0 to i32 + %24 = bitcast float %6 to i32 + %25 = icmp sge i32 %23, %24 + %26 = sext i1 %25 to i32 + %27 = bitcast i32 %26 to float + %28 = bitcast float %27 to i32 + %29 = icmp ne i32 %28, 0 + br i1 %29, label %IF, label %LOOP29 + +IF: ; preds = %LOOP + %30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) + %31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00) + %32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00) + %33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %34 = insertelement <4 x float> undef, float %30, i32 0 + %35 = insertelement <4 x float> %34, float %31, i32 1 + %36 = insertelement <4 x float> %35, float %32, i32 2 + %37 = insertelement <4 x float> %36, float %33, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0) + ret void + +LOOP29: ; preds = %LOOP, %ENDIF30 + %temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ] + %temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ] + %temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ] + %temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ] + %38 = bitcast float %temp20.0 to i32 + %39 = bitcast float %16 to i32 + %40 = icmp sge i32 %38, %39 + %41 = sext i1 %40 to i32 + %42 = bitcast i32 %41 to float + %43 = bitcast float %42 to i32 + %44 = icmp ne i32 %43, 0 + br i1 %44, label %IF31, label %ENDIF30 + +IF31: ; preds = %LOOP29 + %45 = bitcast float %temp12.0 to i32 + %46 = add i32 %45, 1 + %47 = bitcast i32 %46 to float + br label %LOOP + +ENDIF30: ; preds = %LOOP29 + %48 = bitcast float %temp20.0 to i32 + %49 = add i32 %48, 1 + %50 = bitcast i32 %49 to float + br label %LOOP29 +} + +declare float @llvm.AMDIL.clamp.(float, float, float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll new file mode 100644 index 0000000000..d142cacd43 --- /dev/null +++ b/test/CodeGen/R600/schedule-fs-loop.ll @@ -0,0 +1,55 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(9)* null + %1 = extractelement <4 x float> %0, i32 3 + %2 = fptosi float %1 to i32 + %3 = bitcast i32 %2 to float + %4 = load <4 x float> addrspace(9)* null + %5 = extractelement <4 x float> %4, i32 0 + %6 = load <4 x float> addrspace(9)* null + %7 = extractelement <4 x float> %6, i32 1 + %8 = load <4 x float> addrspace(9)* null + %9 = extractelement <4 x float> %8, i32 2 + br label %LOOP + +LOOP: ; preds = %ENDIF, %main_body + %temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ] + %temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ] + %temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ] + %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ] + %10 = bitcast float %temp8.0 to i32 + %11 = bitcast float %3 to i32 + %12 = icmp sge i32 %10, %11 + %13 = sext i1 %12 to i32 + %14 = bitcast i32 %13 to float + %15 = bitcast float %14 to i32 + %16 = icmp ne i32 %15, 0 + br i1 %16, label %IF, label %ENDIF + +IF: ; preds = %LOOP + %17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00) + %18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00) + %19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00) + %20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00) + %21 = insertelement <4 x float> undef, float %17, i32 0 + %22 = insertelement <4 x float> %21, float %18, i32 1 + %23 = insertelement <4 x float> %22, float %19, i32 2 + %24 = insertelement <4 x float> %23, float %20, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0) + ret void + +ENDIF: ; preds = %LOOP + %25 = bitcast float %temp8.0 to i32 + %26 = add i32 %25, 1 + %27 = bitcast i32 %26 to float + br label %LOOP +} + +declare float @llvm.AMDIL.clamp.(float, float, float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll new file mode 100644 index 0000000000..6afd677292 --- /dev/null +++ b/test/CodeGen/R600/schedule-if-2.ll @@ -0,0 +1,94 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %1 = extractelement <4 x float> %0, i32 0 + %2 = fadd float 1.000000e+03, %1 + %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %4 = extractelement <4 x float> %3, i32 0 + %5 = bitcast float %4 to i32 + %6 = icmp eq i32 %5, 0 + %7 = sext i1 %6 to i32 + %8 = bitcast i32 %7 to float + %9 = bitcast float %8 to i32 + %10 = icmp ne i32 %9, 0 + br i1 %10, label %IF, label %ELSE + +IF: ; preds = %main_body + %11 = call float @fabs(float %2) + %12 = fcmp ueq float %11, 0x7FF0000000000000 + %13 = select i1 %12, float 1.000000e+00, float 0.000000e+00 + %14 = fsub float -0.000000e+00, %13 + %15 = fptosi float %14 to i32 + %16 = bitcast i32 %15 to float + %17 = bitcast float %16 to i32 + %18 = icmp ne i32 %17, 0 + %. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00 + %19 = fcmp une float %2, %2 + %20 = select i1 %19, float 1.000000e+00, float 0.000000e+00 + %21 = fsub float -0.000000e+00, %20 + %22 = fptosi float %21 to i32 + %23 = bitcast i32 %22 to float + %24 = bitcast float %23 to i32 + %25 = icmp ne i32 %24, 0 + %temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00 + %26 = bitcast float %. to i32 + %27 = sitofp i32 %26 to float + %28 = bitcast float %temp8.0 to i32 + %29 = sitofp i32 %28 to float + %30 = fcmp ugt float %2, 0.000000e+00 + %31 = select i1 %30, float 1.000000e+00, float %2 + %32 = fcmp uge float %31, 0.000000e+00 + %33 = select i1 %32, float %31, float -1.000000e+00 + %34 = fadd float %33, 1.000000e+00 + %35 = fmul float %34, 5.000000e-01 + br label %ENDIF + +ELSE: ; preds = %main_body + %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %37 = extractelement <4 x float> %36, i32 0 + %38 = bitcast float %37 to i32 + %39 = icmp eq i32 %38, 1 + %40 = sext i1 %39 to i32 + %41 = bitcast i32 %40 to float + %42 = bitcast float %41 to i32 + %43 = icmp ne i32 %42, 0 + br i1 %43, label %IF23, label %ENDIF + +ENDIF: ; preds = %IF23, %ELSE, %IF + %temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ] + %temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ] + %temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ] + %temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ] + %44 = insertelement <4 x float> undef, float %temp4.0, i32 0 + %45 = insertelement <4 x float> %44, float %temp5.0, i32 1 + %46 = insertelement <4 x float> %45, float %temp6.0, i32 2 + %47 = insertelement <4 x float> %46, float %temp7.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0) + ret void + +IF23: ; preds = %ELSE + %48 = fcmp ult float 0.000000e+00, %2 + %49 = select i1 %48, float 1.000000e+00, float 0.000000e+00 + %50 = fsub float -0.000000e+00, %49 + %51 = fptosi float %50 to i32 + %52 = bitcast i32 %51 to float + %53 = bitcast float %52 to i32 + %54 = icmp ne i32 %53, 0 + %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00 + %55 = bitcast float %.28 to i32 + %56 = sitofp i32 %55 to float + %57 = load <4 x float> addrspace(8)* null + %58 = extractelement <4 x float> %57, i32 0 + %59 = fsub float -0.000000e+00, %58 + %60 = fadd float %2, %59 + br label %ENDIF +} + +declare float @fabs(float) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readonly } diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll new file mode 100644 index 0000000000..347d92fd6a --- /dev/null +++ b/test/CodeGen/R600/schedule-if.ll @@ -0,0 +1,46 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %1 = extractelement <4 x float> %0, i32 0 + %2 = bitcast float %1 to i32 + %3 = icmp eq i32 %2, 0 + %4 = sext i1 %3 to i32 + %5 = bitcast i32 %4 to float + %6 = bitcast float %5 to i32 + %7 = icmp ne i32 %6, 0 + br i1 %7, label %ENDIF, label %ELSE + +ELSE: ; preds = %main_body + %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %9 = extractelement <4 x float> %8, i32 0 + %10 = bitcast float %9 to i32 + %11 = icmp eq i32 %10, 1 + %12 = sext i1 %11 to i32 + %13 = bitcast i32 %12 to float + %14 = bitcast float %13 to i32 + %15 = icmp ne i32 %14, 0 + br i1 %15, label %IF13, label %ENDIF + +ENDIF: ; preds = %IF13, %ELSE, %main_body + %temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ] + %temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ] + %temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ] + %16 = insertelement <4 x float> undef, float %temp.0, i32 0 + %17 = insertelement <4 x float> %16, float %temp1.0, i32 1 + %18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2 + %19 = insertelement <4 x float> %18, float %temp3.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0) + ret void + +IF13: ; preds = %ELSE + %20 = load <4 x float> addrspace(8)* null + %21 = extractelement <4 x float> %20, i32 0 + %22 = fsub float -0.000000e+00, %21 + %23 = fadd float 1.000000e+03, %22 + br label %ENDIF +} + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll new file mode 100644 index 0000000000..44b7c2f680 --- /dev/null +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll @@ -0,0 +1,134 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched +;REQUIRES: asserts + +define void @main() { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = fcmp ult float %0, 0.000000e+00 + %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 + %6 = fsub float -0.000000e+00, %5 + %7 = fptosi float %6 to i32 + %8 = bitcast i32 %7 to float + %9 = bitcast float %8 to i32 + %10 = icmp ne i32 %9, 0 + br i1 %10, label %LOOP, label %ENDIF + +ENDIF: ; preds = %ENDIF16, %LOOP, %main_body + %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ] + %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ] + %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ] + %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ] + %11 = load <4 x float> addrspace(9)* null + %12 = extractelement <4 x float> %11, i32 0 + %13 = fmul float %12, %0 + %14 = load <4 x float> addrspace(9)* null + %15 = extractelement <4 x float> %14, i32 1 + %16 = fmul float %15, %0 + %17 = load <4 x float> addrspace(9)* null + %18 = extractelement <4 x float> %17, i32 2 + %19 = fmul float %18, %0 + %20 = load <4 x float> addrspace(9)* null + %21 = extractelement <4 x float> %20, i32 3 + %22 = fmul float %21, %0 + %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %24 = extractelement <4 x float> %23, i32 0 + %25 = fmul float %24, %1 + %26 = fadd float %25, %13 + %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %28 = extractelement <4 x float> %27, i32 1 + %29 = fmul float %28, %1 + %30 = fadd float %29, %16 + %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %32 = extractelement <4 x float> %31, i32 2 + %33 = fmul float %32, %1 + %34 = fadd float %33, %19 + %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %36 = extractelement <4 x float> %35, i32 3 + %37 = fmul float %36, %1 + %38 = fadd float %37, %22 + %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %40 = extractelement <4 x float> %39, i32 0 + %41 = fmul float %40, %2 + %42 = fadd float %41, %26 + %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %44 = extractelement <4 x float> %43, i32 1 + %45 = fmul float %44, %2 + %46 = fadd float %45, %30 + %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %48 = extractelement <4 x float> %47, i32 2 + %49 = fmul float %48, %2 + %50 = fadd float %49, %34 + %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %52 = extractelement <4 x float> %51, i32 3 + %53 = fmul float %52, %2 + %54 = fadd float %53, %38 + %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %56 = extractelement <4 x float> %55, i32 0 + %57 = fmul float %56, %3 + %58 = fadd float %57, %42 + %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %60 = extractelement <4 x float> %59, i32 1 + %61 = fmul float %60, %3 + %62 = fadd float %61, %46 + %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %64 = extractelement <4 x float> %63, i32 2 + %65 = fmul float %64, %3 + %66 = fadd float %65, %50 + %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %68 = extractelement <4 x float> %67, i32 3 + %69 = fmul float %68, %3 + %70 = fadd float %69, %54 + %71 = insertelement <4 x float> undef, float %58, i32 0 + %72 = insertelement <4 x float> %71, float %62, i32 1 + %73 = insertelement <4 x float> %72, float %66, i32 2 + %74 = insertelement <4 x float> %73, float %70, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1) + %75 = insertelement <4 x float> undef, float %temp.0, i32 0 + %76 = insertelement <4 x float> %75, float %temp1.0, i32 1 + %77 = insertelement <4 x float> %76, float %temp2.0, i32 2 + %78 = insertelement <4 x float> %77, float %temp3.0, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2) + ret void + +LOOP: ; preds = %main_body, %ENDIF19 + %temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ] + %temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ] + %temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ] + %temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ] + %temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ] + %79 = fcmp uge float %temp4.0, %0 + %80 = select i1 %79, float 1.000000e+00, float 0.000000e+00 + %81 = fsub float -0.000000e+00, %80 + %82 = fptosi float %81 to i32 + %83 = bitcast i32 %82 to float + %84 = bitcast float %83 to i32 + %85 = icmp ne i32 %84, 0 + br i1 %85, label %ENDIF, label %ENDIF16 + +ENDIF16: ; preds = %LOOP + %86 = fcmp une float %2, %temp4.0 + %87 = select i1 %86, float 1.000000e+00, float 0.000000e+00 + %88 = fsub float -0.000000e+00, %87 + %89 = fptosi float %88 to i32 + %90 = bitcast i32 %89 to float + %91 = bitcast float %90 to i32 + %92 = icmp ne i32 %91, 0 + br i1 %92, label %ENDIF, label %ENDIF19 + +ENDIF19: ; preds = %ENDIF16 + %93 = fadd float %temp.1, 1.000000e+00 + %94 = fadd float %temp1.1, 0.000000e+00 + %95 = fadd float %temp2.1, 0.000000e+00 + %96 = fadd float %temp3.1, 0.000000e+00 + %97 = fadd float %temp4.0, 1.000000e+00 + br label %LOOP +} + +declare float @llvm.R600.load.input(i32) #0 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { readnone } diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll new file mode 100644 index 0000000000..3556facfba --- /dev/null +++ b/test/CodeGen/R600/sdiv.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; The code generated by sdiv is long and complex and may frequently change. +; The goal of this test is to make sure the ISel doesn't fail. +; +; This program was previously failing to compile when one of the selectcc +; opcodes generated by the sdiv lowering was being legalized and optimized to: +; selectcc Remainder -1, 0, -1, SETGT +; This was fixed by adding an additional pattern in R600Instructions.td to +; match this pattern with a CNDGE_INT. + +; CHECK: RETURN + +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 + %num = load i32 addrspace(1) * %in + %den = load i32 addrspace(1) * %den_ptr + %result = sdiv i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll new file mode 100644 index 0000000000..359ca1e6f8 --- /dev/null +++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; Note additional optimizations may cause this SGT to be replaced with a +; CND* instruction. +; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}} +; Test a selectcc with i32 LHS/RHS and float True/False + +define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + %1 = icmp sge i32 %0, 0 + %2 = select i1 %1, float 1.0, float 0.0 + store float %2, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll new file mode 100644 index 0000000000..02d9353904 --- /dev/null +++ b/test/CodeGen/R600/selectcc-opt.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @test_a +; CHECK-NOT: CND +; CHECK: SET{{[NEQGTL]+}}_DX10 + +define void @test_a(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 0.000000e+00 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + %4 = bitcast i32 %3 to float + %5 = bitcast float %4 to i32 + %6 = icmp ne i32 %5, 0 + br i1 %6, label %IF, label %ENDIF + +IF: + %7 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %7 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; Same as test_a, but the branch labels are swapped to produce the inverse cc +; for the icmp instruction + +; CHECK: @test_b +; CHECK: SET{{[GTEQN]+}}_DX10 +; CHECK-NEXT: PRED_ +define void @test_b(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 0.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + %4 = bitcast i32 %3 to float + %5 = bitcast float %4 to i32 + %6 = icmp ne i32 %5, 0 + br i1 %6, label %ENDIF, label %IF + +IF: + %7 = getelementptr i32 addrspace(1)* %out, i32 1 + store i32 0, i32 addrspace(1)* %7 + br label %ENDIF + +ENDIF: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; Test a CND*_INT instruction with float true/false values +; CHECK: @test_c +; CHECK: CND{{[GTE]+}}_INT +define void @test_c(float addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sgt i32 %in, 0 + %1 = select i1 %0, float 2.0, float 3.0 + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc_cnde.ll new file mode 100644 index 0000000000..f0a0f512ba --- /dev/null +++ b/test/CodeGen/R600/selectcc_cnde.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK-NOT: SETE +;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}} +define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { + %1 = load float addrspace(1)* %in + %2 = fcmp oeq float %1, 0.0 + %3 = select i1 %2, float 1.0, float 2.0 + store float %3, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc_cnde_int.ll new file mode 100644 index 0000000000..b38078e26d --- /dev/null +++ b/test/CodeGen/R600/selectcc_cnde_int.ll @@ -0,0 +1,11 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK-NOT: SETE_INT +;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}} +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %1 = load i32 addrspace(1)* %in + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 1, i32 2 + store i32 %3, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll new file mode 100644 index 0000000000..54febcf0e6 --- /dev/null +++ b/test/CodeGen/R600/set-dx10.ll @@ -0,0 +1,137 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; These tests check that floating point comparisons which are used by select +; to store integer true (-1) and false (0) values are lowered to one of the +; SET*DX10 instructions. + +; CHECK: @fcmp_une_select_fptosi +; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp une float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_une_select_i32 +; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp une float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ueq_select_fptosi +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ueq float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ueq_select_i32 +; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ueq float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ugt_select_fptosi +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ugt float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ugt_select_i32 +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ugt float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_uge_select_fptosi +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp uge float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_uge_select_i32 +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00) +define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp uge float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ule_select_fptosi +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ule_select_i32 +; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ult_select_fptosi +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 + %2 = fsub float -0.000000e+00, %1 + %3 = fptosi float %2 to i32 + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; CHECK: @fcmp_ult_select_i32 +; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.v4i32.ll new file mode 100644 index 0000000000..0752f2e63d --- /dev/null +++ b/test/CodeGen/R600/setcc.v4i32.ll @@ -0,0 +1,12 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +;CHECK: SETE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = icmp eq <4 x i32> %a, %b + %sext = sext <4 x i1> %result to <4 x i32> + store <4 x i32> %sext, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/short-args.ll b/test/CodeGen/R600/short-args.ll new file mode 100644 index 0000000000..b69e327bf6 --- /dev/null +++ b/test/CodeGen/R600/short-args.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @i8_arg +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { +entry: + %0 = zext i8 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: @i8_zext_arg +; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { +entry: + %0 = zext i8 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: @i16_arg +; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { +entry: + %0 = zext i16 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} + +; CHECK: @i16_zext_arg +; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} + +define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { +entry: + %0 = zext i16 %in to i32 + store i32 %0, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll new file mode 100644 index 0000000000..8b0d244459 --- /dev/null +++ b/test/CodeGen/R600/store.v4f32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %1 = load <4 x float> addrspace(1) * %in + store <4 x float> %1, <4 x float> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll new file mode 100644 index 0000000000..a659815dde --- /dev/null +++ b/test/CodeGen/R600/store.v4i32.ll @@ -0,0 +1,9 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %1 = load <4 x i32> addrspace(1) * %in + store <4 x i32> %1, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.v4i32.ll new file mode 100644 index 0000000000..47657a6be7 --- /dev/null +++ b/test/CodeGen/R600/udiv.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;The code generated by udiv is long and complex and may frequently change. +;The goal of this test is to make sure the ISel doesn't fail when it gets +;a v4i32 udiv +;CHECK: RETURN + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = udiv <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll new file mode 100644 index 0000000000..b48c591518 --- /dev/null +++ b/test/CodeGen/R600/unsupported-cc.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; These tests are for condition codes that are not supported by the hardware + +; CHECK: @slt +; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45) +define void @slt(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp slt i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ult_i32 +; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45) +define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp ult i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ult_float +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @ult_float(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ult float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK: @olt +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @olt(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp olt float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK: @sle +; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45) +define void @sle(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp sle i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ule_i32 +; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45) +define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = icmp ule i32 %in, 5 + %1 = select i1 %0, i32 -1, i32 0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; CHECK: @ule_float +; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @ule_float(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ule float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} + +; CHECK: @ole +; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00) +define void @ole(float addrspace(1)* %out, float %in) { +entry: + %0 = fcmp ole float %in, 5.0 + %1 = select i1 %0, float 1.0, float 0.0 + store float %1, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.v4i32.ll new file mode 100644 index 0000000000..2e7388caa6 --- /dev/null +++ b/test/CodeGen/R600/urem.v4i32.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;The code generated by urem is long and complex and may frequently change. +;The goal of this test is to make sure the ISel doesn't fail when it gets +;a v4i32 urem +;CHECK: RETURN + +define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32> addrspace(1) * %in + %b = load <4 x i32> addrspace(1) * %b_ptr + %result = urem <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll new file mode 100644 index 0000000000..8f62bc6929 --- /dev/null +++ b/test/CodeGen/R600/vec4-expand.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @fp_to_sint +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float> addrspace(1) * %in + %result = fptosi <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK: @fp_to_uint +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float> addrspace(1) * %in + %result = fptoui <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; CHECK: @sint_to_fp +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32> addrspace(1) * %in + %result = sitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; CHECK: @uint_to_fp +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} + +define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32> addrspace(1) * %in + %result = uitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} |