diff options
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 36 | ||||
-rw-r--r-- | test/Transforms/ScalarRepl/vector_memcpy.ll | 18 |
2 files changed, 35 insertions, 19 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 98d5a02701..78730b6eb7 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1350,8 +1350,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Store of constant value and constant size. if (isa<ConstantInt>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength())) { - // FIXME (!): Why reset VecTy? - VecTy = Type::VoidTy; IsNotTrivial = true; continue; } @@ -1628,21 +1626,25 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, const Type *AllocaType = Old->getType(); if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { - // If the result alloca is a vector type, this is either an element - // access or a bitcast to another vector type. - if (isa<VectorType>(SV->getType())) { - SV = Builder.CreateBitCast(SV, AllocaType, "tmp"); - } else { - // Must be an element insertion. - unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType()); - - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); - - SV = Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::Int32Ty, Elt), - "tmp"); - } + uint64_t VecSize = TD->getTypePaddedSizeInBits(VTy); + uint64_t ValSize = TD->getTypePaddedSizeInBits(SV->getType()); + + // Changing the whole vector with memset or with an access of a different + // vector type? + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + + uint64_t EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType()); + + // Must be an element insertion. + unsigned Elt = Offset/EltSize; + + if (SV->getType() != VTy->getElementType()) + SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + + SV = Builder.CreateInsertElement(Old, SV, + ConstantInt::get(Type::Int32Ty, Elt), + "tmp"); return SV; } diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll index dc947b0d71..56785788ff 100644 --- a/test/Transforms/ScalarRepl/vector_memcpy.ll +++ b/test/Transforms/ScalarRepl/vector_memcpy.ll @@ -1,4 +1,7 @@ -; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {ret <16 x float> %A} +; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis > %t +; RUN: grep {ret <16 x float> %A} %t +; RUN: grep {ret <16 x float> zeroinitializer} %t + define <16 x float> @foo(<16 x float> %A) nounwind { %tmp = alloca <16 x float>, align 16 %tmp2 = alloca <16 x float>, align 16 @@ -11,5 +14,16 @@ define <16 x float> @foo(<16 x float> %A) nounwind { ret <16 x float> %R } -declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +define <16 x float> @foo2(<16 x float> %A) nounwind { + %tmp2 = alloca <16 x float>, align 16 + %s2 = bitcast <16 x float>* %tmp2 to i8* + call void @llvm.memset.i64(i8* %s2, i8 0, i64 64, i32 16) + + %R = load <16 x float>* %tmp2 + ret <16 x float> %R +} + + +declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind |