diff options
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 373 |
1 files changed, 230 insertions, 143 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 1c220ca0f6..a0ee849a03 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2150,7 +2150,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, !canConvertValue(TD, ValueTy, AllocaTy)) return false; } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { - if (MI->isVolatile()) + if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO @@ -2223,6 +2223,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } +static Value *extractVector(IRBuilder<> &IRB, Value *V, + unsigned BeginIndex, unsigned EndIndex, + const Twine &Name) { + VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + + if (NumElements == VecTy->getNumElements()) + return V; + + if (NumElements == 1) { + V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), + Name + ".extract"); + DEBUG(dbgs() << " extract: " << *V << "\n"); + return V; + } + + SmallVector<Constant*, 8> Mask; + Mask.reserve(NumElements); + for (unsigned i = BeginIndex; i != EndIndex; ++i) + Mask.push_back(IRB.getInt32(i)); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + Name + ".extract"); + DEBUG(dbgs() << " shuffle: " << *V << "\n"); + return V; +} + +static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, + unsigned BeginIndex, const Twine &Name) { + VectorType *VecTy = cast<VectorType>(Old->getType()); + assert(VecTy && "Can only insert a vector into a vector"); + + VectorType *Ty = dyn_cast<VectorType>(V->getType()); + if (!Ty) { + // Single element to insert. + V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), + Name + ".insert"); + DEBUG(dbgs() << " insert: " << *V << "\n"); + return V; + } + + assert(Ty->getNumElements() <= VecTy->getNumElements() && + "Too many elements!"); + if (Ty->getNumElements() == VecTy->getNumElements()) { + assert(V->getType() == VecTy && "Vector type mismatch"); + return V; + } + unsigned EndIndex = BeginIndex + Ty->getNumElements(); + + // When inserting a smaller vector into the larger to store, we first + // use a shuffle vector to widen it with undef elements, and then + // a second shuffle vector to select between the loaded vector and the + // incoming vector. + SmallVector<Constant*, 8> Mask; + Mask.reserve(VecTy->getNumElements()); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i - BeginIndex)); + else + Mask.push_back(UndefValue::get(IRB.getInt32Ty())); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(Mask), + Name + ".expand"); + DEBUG(dbgs() << " shuffle1: " << *V << "\n"); + + Mask.clear(); + for (unsigned i = 0; i != VecTy->getNumElements(); ++i) + if (i >= BeginIndex && i < EndIndex) + Mask.push_back(IRB.getInt32(i)); + else + Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); + V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask), + Name + "insert"); + DEBUG(dbgs() << " shuffle2: " << *V << "\n"); + return V; +} + namespace { /// \brief Visitor to rewrite instructions using a partition of an alloca to /// use a new alloca. @@ -2388,29 +2466,14 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { - Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); - unsigned NumElements = EndIndex - BeginIndex; - assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - if (NumElements == 1) { - V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), - getName(".extract")); - DEBUG(dbgs() << " extract: " << *V << "\n"); - } else if (NumElements < VecTy->getNumElements()) { - SmallVector<Constant*, 8> Mask; - Mask.reserve(NumElements); - for (unsigned i = BeginIndex; i != EndIndex; ++i) - Mask.push_back(IRB.getInt32(i)); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), - getName(".extract")); - DEBUG(dbgs() << " shuffle: " << *V << "\n"); - } - return V; + + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); } Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { @@ -2457,7 +2520,7 @@ private: bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB, LI, OldOp); + V = rewriteVectorizedLoadInst(IRB); } else if (IntTy && LI.getType()->isIntegerTy()) { V = rewriteIntegerLoad(IRB, LI); } else if (BeginOffset == NewAllocaBeginOffset && @@ -2518,44 +2581,12 @@ private: : VectorType::get(ElementTy, NumElements); if (V->getType() != PartitionTy) V = convertValue(TD, IRB, V, PartitionTy); - if (NumElements < VecTy->getNumElements()) { - // We need to mix in the existing elements. - LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - if (NumElements == 1) { - V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex), - getName(".insert")); - DEBUG(dbgs() << " insert: " << *V << "\n"); - } else { - // When inserting a smaller vector into the larger to store, we first - // use a shuffle vector to widen it with undef elements, and then - // a second shuffle vector to select between the loaded vector and the - // incoming vector. - SmallVector<Constant*, 8> Mask; - Mask.reserve(VecTy->getNumElements()); - for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i - BeginIndex)); - else - Mask.push_back(UndefValue::get(IRB.getInt32Ty())); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), - getName(".expand")); - DEBUG(dbgs() << " shuffle1: " << *V << "\n"); - - Mask.clear(); - for (unsigned i = 0; i != VecTy->getNumElements(); ++i) - if (i >= BeginIndex && i < EndIndex) - Mask.push_back(IRB.getInt32(i)); - else - Mask.push_back(IRB.getInt32(i + VecTy->getNumElements())); - V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask), - getName("insert")); - DEBUG(dbgs() << " shuffle2: " << *V << "\n"); - } - } else { - V = convertValue(TD, IRB, V, VecTy); - } + + // Mix in the existing elements. + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2607,7 +2638,7 @@ private: TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); assert(V->getType()->getIntegerBitWidth() == - TD.getTypeSizeInBits(OldAI.getAllocatedType()) && + TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, @@ -2639,6 +2670,51 @@ private: return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } + /// \brief Compute an integer value from splatting an i8 across the given + /// number of bytes. + /// + /// Note that this routine assumes an i8 is a byte. If that isn't true, don't + /// call this routine. + /// FIXME: Heed the abvice above. + /// + /// \param V The i8 value to splat. + /// \param Size The number of bytes in the output (assuming i8 is one byte) + Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + assert(Size > 0 && "Expected a positive number of bytes."); + IntegerType *VTy = cast<IntegerType>(V->getType()); + assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); + if (Size == 1) + return V; + + Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + ConstantExpr::getUDiv( + Constant::getAllOnesValue(SplatIntTy), + ConstantExpr::getZExt( + Constant::getAllOnesValue(V->getType()), + SplatIntTy)), + getName(".isplat")); + return V; + } + + /// \brief Compute a vector splat for a given element value. + Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { + assert(NumElements > 0 && "Cannot splat to an empty vector."); + + // First insert it into a one-element vector so we can shuffle it. It is + // really silly that LLVM's IR requires this in order to form a splat. + Value *Undef = UndefValue::get(VectorType::get(V->getType(), 1)); + V = IRB.CreateInsertElement(Undef, V, IRB.getInt32(0), + getName(".splatinsert")); + + // Shuffle the value across the desired number of elements. + SmallVector<Constant*, 8> Mask(NumElements, IRB.getInt32(0)); + V = IRB.CreateShuffleVector(V, Undef, ConstantVector::get(Mask), + getName(".splat")); + DEBUG(dbgs() << " splat: " << *V << "\n"); + return V; + } + bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); IRBuilder<> IRB(&II); @@ -2667,7 +2743,8 @@ private: (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaEndOffset || !AllocaTy->isSingleValueType() || - !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { + !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) || + TD.getTypeSizeInBits(ScalarTy)%8 != 0)) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); CallInst *New @@ -2683,53 +2760,62 @@ private: // If we can represent this as a simple value, we have to build the actual // value to store, which requires expanding the byte present in memset to // a sensible representation for the alloca type. This is essentially - // splatting the byte to a sufficiently wide integer, bitcasting to the - // desired scalar type, and splatting it across any desired vector type. + // splatting the byte to a sufficiently wide integer, splatting it across + // any desired vector width, and bitcasting to the final type. uint64_t Size = EndOffset - BeginOffset; - Value *V = II.getValue(); - IntegerType *VTy = cast<IntegerType>(V->getType()); - Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - if (Size*8 > VTy->getBitWidth()) - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), - ConstantExpr::getUDiv( - Constant::getAllOnesValue(SplatIntTy), - ConstantExpr::getZExt( - Constant::getAllOnesValue(V->getType()), - SplatIntTy)), - getName(".isplat")); - - // If this is an element-wide memset of a vectorizable alloca, insert it. - if (VecTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset)) { - if (V->getType() != ScalarTy) - V = convertValue(TD, IRB, V, ScalarTy); - StoreInst *Store = IRB.CreateAlignedStore( - IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, - NewAI.getAlignment(), - getName(".load")), - V, IRB.getInt32(getIndex(BeginOffset)), - getName(".insert")), - &NewAI, NewAI.getAlignment()); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); - return true; - } + Value *V = getIntegerSplat(IRB, II.getValue(), Size); + + if (VecTy) { + // If this is a memset of a vectorized alloca, insert it. + assert(ElementTy == ScalarTy); + + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + + Value *Splat = getIntegerSplat(IRB, II.getValue(), + TD.getTypeSizeInBits(ElementTy)/8); + Splat = convertValue(TD, IRB, Splat, ElementTy); + if (NumElements > 1) + Splat = getVectorSplat(IRB, Splat, NumElements); - // If this is a memset on an alloca where we can widen stores, insert the - // set integer. - if (IntTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset)) { - assert(!II.isVolatile()); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); - Old = convertValue(TD, IRB, Old, IntTy); - assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); - uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); - } + V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + } else if (IntTy) { + // If this is a memset on an alloca where we can widen stores, insert the + // set integer. + assert(!II.isVolatile()); - if (V->getType() != AllocaTy) + V = getIntegerSplat(IRB, II.getValue(), Size); + + if (IntTy && (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaBeginOffset)) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); + uint64_t Offset = BeginOffset - NewAllocaBeginOffset; + V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + } else { + assert(V->getType() == IntTy && + "Wrong type for an alloca wide integer!"); + } V = convertValue(TD, IRB, V, AllocaTy); + } else { + // Established these invariants above. + assert(BeginOffset == NewAllocaBeginOffset); + assert(EndOffset == NewAllocaEndOffset); + + V = getIntegerSplat(IRB, II.getValue(), + TD.getTypeSizeInBits(ScalarTy)/8); + if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) + V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); + + V = convertValue(TD, IRB, V, AllocaTy); + } Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), II.isVolatile()); @@ -2814,37 +2900,22 @@ private: // Record this instruction for deletion. Pass.DeadInsts.insert(&II); - bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && - EndOffset == NewAllocaEndOffset; - bool IsVectorElement = VecTy && !IsWholeAlloca; - uint64_t Size = EndOffset - BeginOffset; - IntegerType *SubIntTy - = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; - - Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() - : II.getRawDest()->getType(); - if (!EmitMemCpy) { - if (IsVectorElement) - OtherPtrTy = VecTy->getElementType()->getPointerTo(); - else if (IntTy && !IsWholeAlloca) - OtherPtrTy = SubIntTy->getPointerTo(); - else - OtherPtrTy = NewAI.getType(); - } - - // Compute the other pointer, folding as much as possible to produce - // a single, simple GEP in most cases. - Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); - // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. + Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); if (AllocaInst *AI = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) Pass.Worklist.insert(AI); if (EmitMemCpy) { + Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() + : II.getRawDest()->getType(); + + // Compute the other pointer, folding as much as possible to produce + // a single, simple GEP in most cases. + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, + getName("." + OtherPtr->getName())); + Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() : II.getRawSource()->getType()); @@ -2865,18 +2936,38 @@ private: if (!Align) Align = 1; - Value *SrcPtr = OtherPtr; + bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset; + uint64_t Size = EndOffset - BeginOffset; + unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0; + unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0; + unsigned NumElements = EndIndex - BeginIndex; + IntegerType *SubIntTy + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; + + Type *OtherPtrTy = NewAI.getType(); + if (VecTy && !IsWholeAlloca) { + if (NumElements == 1) + OtherPtrTy = VecTy->getElementType(); + else + OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); + + OtherPtrTy = OtherPtrTy->getPointerTo(); + } else if (IntTy && !IsWholeAlloca) { + OtherPtrTy = SubIntTy->getPointerTo(); + } + + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, + getName("." + OtherPtr->getName())); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); Value *Src; - if (IsVectorElement && !IsDest) { - // We have to extract rather than load. - Src = IRB.CreateExtractElement( - IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), - IRB.getInt32(getIndex(BeginOffset)), - getName(".copyextract")); + if (VecTy && !IsWholeAlloca && !IsDest) { + Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); @@ -2889,7 +2980,11 @@ private: getName(".copyload")); } - if (IntTy && !IsWholeAlloca && IsDest) { + if (VecTy && !IsWholeAlloca && IsDest) { + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".oldload")); Old = convertValue(TD, IRB, Old, IntTy); @@ -2899,14 +2994,6 @@ private: Src = convertValue(TD, IRB, Src, NewAllocaTy); } - if (IsVectorElement && IsDest) { - // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement( - IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), - Src, IRB.getInt32(getIndex(BeginOffset)), - getName(".insert")); - } - StoreInst *Store = cast<StoreInst>( IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; |