diff options
author | Evan Cheng <evan.cheng@apple.com> | 2009-05-11 22:33:01 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2009-05-11 22:33:01 +0000 |
commit | 5792f51e12d9c8685399e9857799365854ab5bf6 (patch) | |
tree | 70eb5ebee02553ba2b892ce1ff7f7bbaad77a682 /test/CodeGen/X86/lsr-loop-exit-cond.ll | |
parent | 6dc4ade59505fd8a01370ff8da5b18110f7a2f41 (diff) |
Teach LSR to optimize more loop exit compares, i.e. change them to use postinc iv value. Previously LSR would only optimize those which are in the loop latch block. However, if LSR can prove it is safe (and profitable), it's now possible to change those not in the latch blocks to use postinc values.
Also, if the compare is the only use, LSR would place the iv increment instruction before the compare instead in the latch.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@71485 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/lsr-loop-exit-cond.ll')
-rw-r--r-- | test/CodeGen/X86/lsr-loop-exit-cond.ll | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll new file mode 100644 index 0000000000..c998268600 --- /dev/null +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -0,0 +1,134 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne + +@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5] +@Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4] +@Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2] + +define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp { +entry: + %0 = load i32* %rk, align 4 ; <i32> [#uses=1] + %1 = getelementptr i32* %rk, i64 1 ; <i32*> [#uses=1] + %2 = load i32* %1, align 4 ; <i32> [#uses=1] + %tmp15 = add i32 %r, -1 ; <i32> [#uses=1] + %tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2] + br label %bb + +bb: ; preds = %bb1, %entry + %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3] + %s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ] ; <i32> [#uses=2] + %s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ] ; <i32> [#uses=2] + %tmp18 = shl i64 %indvar, 4 ; <i64> [#uses=4] + %rk26 = bitcast i32* %rk to i8* ; <i8*> [#uses=6] + %3 = lshr i32 %s0.0, 24 ; <i32> [#uses=1] + %4 = zext i32 %3 to i64 ; <i64> [#uses=1] + %5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1] + %6 = load i32* %5, align 4 ; <i32> [#uses=1] + %7 = lshr i32 %s1.0, 16 ; <i32> [#uses=1] + %8 = and i32 %7, 255 ; <i32> [#uses=1] + %9 = zext i32 %8 to i64 ; <i64> [#uses=1] + %10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1] + %11 = load i32* %10, align 4 ; <i32> [#uses=1] + %ctg2.sum2728 = or i64 %tmp18, 8 ; <i64> [#uses=1] + %12 = getelementptr i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1] + %13 = bitcast i8* %12 to i32* ; <i32*> [#uses=1] + %14 = load i32* %13, align 4 ; <i32> [#uses=1] + %15 = xor i32 %11, %6 ; <i32> [#uses=1] + %16 = xor i32 %15, %14 ; <i32> [#uses=3] + %17 = lshr i32 %s1.0, 24 ; <i32> [#uses=1] + %18 = zext i32 %17 to i64 ; <i64> [#uses=1] + %19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1] + %20 = load i32* %19, align 4 ; <i32> [#uses=1] + %21 = and i32 %s0.0, 255 ; <i32> [#uses=1] + %22 = zext i32 %21 to i64 ; <i64> [#uses=1] + %23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1] + %24 = load i32* %23, align 4 ; <i32> [#uses=1] + %ctg2.sum2930 = or i64 %tmp18, 12 ; <i64> [#uses=1] + %25 = getelementptr i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1] + %26 = bitcast i8* %25 to i32* ; <i32*> [#uses=1] + %27 = load i32* %26, align 4 ; <i32> [#uses=1] + %28 = xor i32 %24, %20 ; <i32> [#uses=1] + %29 = xor i32 %28, %27 ; <i32> [#uses=4] + %30 = lshr i32 %16, 24 ; <i32> [#uses=1] + %31 = zext i32 %30 to i64 ; <i64> [#uses=1] + %32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1] + %33 = load i32* %32, align 4 ; <i32> [#uses=2] + %exitcond = icmp eq i64 %indvar, %tmp.16 ; <i1> [#uses=1] + br i1 %exitcond, label %bb2, label %bb1 + +bb1: ; preds = %bb + %ctg2.sum31 = add i64 %tmp18, 16 ; <i64> [#uses=1] + %34 = getelementptr i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1] + %35 = bitcast i8* %34 to i32* ; <i32*> [#uses=1] + %36 = lshr i32 %29, 16 ; <i32> [#uses=1] + %37 = and i32 %36, 255 ; <i32> [#uses=1] + %38 = zext i32 %37 to i64 ; <i64> [#uses=1] + %39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1] + %40 = load i32* %39, align 4 ; <i32> [#uses=1] + %41 = load i32* %35, align 4 ; <i32> [#uses=1] + %42 = xor i32 %40, %33 ; <i32> [#uses=1] + %43 = xor i32 %42, %41 ; <i32> [#uses=1] + %44 = lshr i32 %29, 24 ; <i32> [#uses=1] + %45 = zext i32 %44 to i64 ; <i64> [#uses=1] + %46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1] + %47 = load i32* %46, align 4 ; <i32> [#uses=1] + %48 = and i32 %16, 255 ; <i32> [#uses=1] + %49 = zext i32 %48 to i64 ; <i64> [#uses=1] + %50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1] + %51 = load i32* %50, align 4 ; <i32> [#uses=1] + %ctg2.sum32 = add i64 %tmp18, 20 ; <i64> [#uses=1] + %52 = getelementptr i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1] + %53 = bitcast i8* %52 to i32* ; <i32*> [#uses=1] + %54 = load i32* %53, align 4 ; <i32> [#uses=1] + %55 = xor i32 %51, %47 ; <i32> [#uses=1] + %56 = xor i32 %55, %54 ; <i32> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br label %bb + +bb2: ; preds = %bb + %tmp10 = shl i64 %tmp.16, 4 ; <i64> [#uses=2] + %ctg2.sum = add i64 %tmp10, 16 ; <i64> [#uses=1] + %tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1] + %57 = bitcast i8* %tmp1213 to i32* ; <i32*> [#uses=1] + %58 = and i32 %33, -16777216 ; <i32> [#uses=1] + %59 = lshr i32 %29, 16 ; <i32> [#uses=1] + %60 = and i32 %59, 255 ; <i32> [#uses=1] + %61 = zext i32 %60 to i64 ; <i64> [#uses=1] + %62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1] + %63 = load i32* %62, align 4 ; <i32> [#uses=1] + %64 = and i32 %63, 16711680 ; <i32> [#uses=1] + %65 = or i32 %64, %58 ; <i32> [#uses=1] + %66 = load i32* %57, align 4 ; <i32> [#uses=1] + %67 = xor i32 %65, %66 ; <i32> [#uses=2] + %68 = lshr i32 %29, 8 ; <i32> [#uses=1] + %69 = zext i32 %68 to i64 ; <i64> [#uses=1] + %70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1] + %71 = load i32* %70, align 4 ; <i32> [#uses=1] + %72 = and i32 %71, -16777216 ; <i32> [#uses=1] + %73 = and i32 %16, 255 ; <i32> [#uses=1] + %74 = zext i32 %73 to i64 ; <i64> [#uses=1] + %75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1] + %76 = load i32* %75, align 4 ; <i32> [#uses=1] + %77 = and i32 %76, 16711680 ; <i32> [#uses=1] + %78 = or i32 %77, %72 ; <i32> [#uses=1] + %ctg2.sum25 = add i64 %tmp10, 20 ; <i64> [#uses=1] + %79 = getelementptr i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1] + %80 = bitcast i8* %79 to i32* ; <i32*> [#uses=1] + %81 = load i32* %80, align 4 ; <i32> [#uses=1] + %82 = xor i32 %78, %81 ; <i32> [#uses=2] + %83 = lshr i32 %67, 24 ; <i32> [#uses=1] + %84 = trunc i32 %83 to i8 ; <i8> [#uses=1] + store i8 %84, i8* %out, align 1 + %85 = lshr i32 %67, 16 ; <i32> [#uses=1] + %86 = trunc i32 %85 to i8 ; <i8> [#uses=1] + %87 = getelementptr i8* %out, i64 1 ; <i8*> [#uses=1] + store i8 %86, i8* %87, align 1 + %88 = getelementptr i8* %out, i64 4 ; <i8*> [#uses=1] + %89 = lshr i32 %82, 24 ; <i32> [#uses=1] + %90 = trunc i32 %89 to i8 ; <i8> [#uses=1] + store i8 %90, i8* %88, align 1 + %91 = lshr i32 %82, 16 ; <i32> [#uses=1] + %92 = trunc i32 %91 to i8 ; <i8> [#uses=1] + %93 = getelementptr i8* %out, i64 5 ; <i8*> [#uses=1] + store i8 %92, i8* %93, align 1 + ret void +} |