; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
%struct.anon = type { [100 x i32], i32, [100 x i32] }
%struct.anon.0 = type { [100 x [100 x i32]], i32, [100 x [100 x i32]] }
@Foo = common global %struct.anon zeroinitializer, align 4
@Bar = common global %struct.anon.0 zeroinitializer, align 4
@PB = external global i32*
@PA = external global i32*
;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
; /// Different objects, positive induction, constant distance
; int noAlias01 (int a) {
; int i;
; for (i=0; i<SIZE; i++)
; Foo.A[i] = Foo.B[i] + a;
; return Foo.A[a];
; }
; CHECK: define i32 @noAlias01
; CHECK: add nsw <4 x i32>
; CHECK: ret
define i32 @noAlias01(i32 %a) nounwind {
entry:
%a.addr = alloca i32, align 4
%i = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32* %i, align 4
%cmp = icmp slt i32 %0, 100
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%1 = load i32* %i, align 4
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
%2 = load i32* %arrayidx, align 4
%3 = load i32* %a.addr, align 4
%add = add nsw i32 %2, %3
%4 = load i32* %i, align 4
%arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
store i32 %add, i32* %arrayidx1, align 4
br label %for.inc
for.inc: ; preds = %for.body
%5 = load i32* %i, align 4
%inc = add nsw i32 %5, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
%6 = load i32* %a.addr, align 4
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
%7 = load i32* %arrayidx2, align 4
ret i32 %7
}
; /// Different objects, positive induction with widening slide
; int noAlias02 (int a) {
; int i;
; for (i=0; i<SIZE-10; i++)
; Foo.A[i] = Foo.B[i+10] + a;
; return Foo.A[a];
; }
; CHECK: define i32 @noAlias02
; CHECK: add nsw <4 x i32>
; CHECK: ret
define i32 @noAlias02(i32 %a) {
entry:
%a.addr = alloca i32, align 4
%i = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32* %i, align 4
%cmp = icmp slt i32 %0, 90
br i1 %cmp, label %for.body,