diff options
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 7 | ||||
-rw-r--r-- | test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll | 60 |
2 files changed, 64 insertions, 3 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 6b3c22b4f5..9f286b5642 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -742,8 +742,9 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned FromTypeSize = TD.getTypeAllocSize(FromType); unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (ToTypeSize == AllocaSize) { + if (FromTypeSize == ToTypeSize) { // If the two types have the same primitive size, use a bit cast. // Otherwise, it is two vectors with the same element type that has // the same allocation size but different number of elements so use @@ -755,13 +756,13 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return CreateShuffleVectorCast(FromVal, ToType, Builder); } - if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " "of a smaller vector type at a nonzero offset."); const Type *CastElementTy = getScaledElementType(FromType, ToType, ToTypeSize * 8); - unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; LLVMContext &Context = FromVal->getContext(); const Type *CastTy = VectorType::get(CastElementTy, diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll new file mode 100644 index 0000000000..0b008f48db --- /dev/null +++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -S -scalarrepl | FileCheck %s +; RUN: opt < %s -S -scalarrepl-ssa | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.0" + +%0 = type { <2 x float>, float } +%struct.PointC3 = type { %struct.array } +%struct.Point_3 = type { %struct.PointC3 } +%struct.array = type { [3 x float], [4 x i8] } + +; CHECK: main +; CHECK-NOT: alloca +; CHECK: extractelement <2 x float> zeroinitializer + +define void @main() uwtable ssp { +entry: + %ref.tmp2 = alloca %0, align 16 + %tmpcast = bitcast %0* %ref.tmp2 to %struct.Point_3* + %0 = getelementptr %0* %ref.tmp2, i64 0, i32 0 + store <2 x float> zeroinitializer, <2 x float>* %0, align 16 + %1 = getelementptr inbounds %struct.Point_3* %tmpcast, i64 0, i32 0 + %base.i.i.i = getelementptr inbounds %struct.PointC3* %1, i64 0, i32 0 + %arrayidx.i.i.i.i = getelementptr inbounds %struct.array* %base.i.i.i, i64 0, i32 0, i64 0 + %tmp5.i.i = load float* %arrayidx.i.i.i.i, align 4 + ret void +} + +; CHECK: test1 +; CHECK-NOT: alloca +; CHECK: extractelement <2 x float> zeroinitializer + +define void @test1() uwtable ssp { +entry: + %ref.tmp2 = alloca {<2 x float>, float}, align 16 + %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float* + %0 = getelementptr {<2 x float>, float}* %ref.tmp2, i64 0, i32 0 + store <2 x float> zeroinitializer, <2 x float>* %0, align 16 + %tmp5.i.i = load float* %tmpcast, align 4 + ret void +} + +; CHECK: test2 +; CHECK-NOT: alloca +; CHECK: insertelement <2 x float> zeroinitializer +; CHECK: extractelement <2 x float> %tmp2 +; CHECK: extractelement <2 x float> %tmp2 + +define float @test2() uwtable ssp { +entry: + %ref.tmp2 = alloca {<2 x float>, float}, align 16 + %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float* + %tmpcast2 = getelementptr {<2 x float>, float}* %ref.tmp2, i64 0, i32 1 + %0 = getelementptr {<2 x float>, float}* %ref.tmp2, i64 0, i32 0 + store <2 x float> zeroinitializer, <2 x float>* %0, align 16 + store float 1.0, float* %tmpcast2, align 4 + %r1 = load float* %tmpcast, align 4 + %r2 = load float* %tmpcast2, align 4 + %r = fadd float %r1, %r2 + ret float %r +} |