aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/SROA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
-rw-r--r--lib/Transforms/Scalar/SROA.cpp373
1 files changed, 230 insertions, 143 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1c220ca0f6..a0ee849a03 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2150,7 +2150,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
- if (MI->isVolatile())
+ if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
@@ -2223,6 +2223,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
return V;
}
+static Value *extractVector(IRBuilder<> &IRB, Value *V,
+ unsigned BeginIndex, unsigned EndIndex,
+ const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ if (NumElements == VecTy->getNumElements())
+ return V;
+
+ if (NumElements == 1) {
+ V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+ Name + ".extract");
+ DEBUG(dbgs() << " extract: " << *V << "\n");
+ return V;
+ }
+
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElements);
+ for (unsigned i = BeginIndex; i != EndIndex; ++i)
+ Mask.push_back(IRB.getInt32(i));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".extract");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
+ return V;
+}
+
+static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+ unsigned BeginIndex, const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(Old->getType());
+ assert(VecTy && "Can only insert a vector into a vector");
+
+ VectorType *Ty = dyn_cast<VectorType>(V->getType());
+ if (!Ty) {
+ // Single element to insert.
+ V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
+ Name + ".insert");
+ DEBUG(dbgs() << " insert: " << *V << "\n");
+ return V;
+ }
+
+ assert(Ty->getNumElements() <= VecTy->getNumElements() &&
+ "Too many elements!");
+ if (Ty->getNumElements() == VecTy->getNumElements()) {
+ assert(V->getType() == VecTy && "Vector type mismatch");
+ return V;
+ }
+ unsigned EndIndex = BeginIndex + Ty->getNumElements();
+
+ // When inserting a smaller vector into the larger to store, we first
+ // use a shuffle vector to widen it with undef elements, and then
+ // a second shuffle vector to select between the loaded vector and the
+ // incoming vector.
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(VecTy->getNumElements());
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i - BeginIndex));
+ else
+ Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".expand");
+ DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+
+ Mask.clear();
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i));
+ else
+ Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+ V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
+ Name + "insert");
+ DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ return V;
+}
+
namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
@@ -2388,29 +2466,14 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
unsigned BeginIndex = getIndex(BeginOffset);
unsigned EndIndex = getIndex(EndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
- unsigned NumElements = EndIndex - BeginIndex;
- assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
- if (NumElements == 1) {
- V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
- getName(".extract"));
- DEBUG(dbgs() << " extract: " << *V << "\n");
- } else if (NumElements < VecTy->getNumElements()) {
- SmallVector<Constant*, 8> Mask;
- Mask.reserve(NumElements);
- for (unsigned i = BeginIndex; i != EndIndex; ++i)
- Mask.push_back(IRB.getInt32(i));
- V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
- ConstantVector::get(Mask),
- getName(".extract"));
- DEBUG(dbgs() << " shuffle: " << *V << "\n");
- }
- return V;
+
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
}
Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
@@ -2457,7 +2520,7 @@ private:
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ V = rewriteVectorizedLoadInst(IRB);
} else if (IntTy && LI.getType()->isIntegerTy()) {
V = rewriteIntegerLoad(IRB, LI);
} else if (BeginOffset == NewAllocaBeginOffset &&
@@ -2518,44 +2581,12 @@ private:
: VectorType::get(ElementTy, NumElements);
if (V->getType() != PartitionTy)
V = convertValue(TD, IRB, V, PartitionTy);
- if (NumElements < VecTy->getNumElements()) {
- // We need to mix in the existing elements.
- LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- if (NumElements == 1) {
- V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex),
- getName(".insert"));
- DEBUG(dbgs() << " insert: " << *V << "\n");
- } else {
- // When inserting a smaller vector into the larger to store, we first
- // use a shuffle vector to widen it with undef elements, and then
- // a second shuffle vector to select between the loaded vector and the
- // incoming vector.
- SmallVector<Constant*, 8> Mask;
- Mask.reserve(VecTy->getNumElements());
- for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i - BeginIndex));
- else
- Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
- V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
- ConstantVector::get(Mask),
- getName(".expand"));
- DEBUG(dbgs() << " shuffle1: " << *V << "\n");
-
- Mask.clear();
- for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i));
- else
- Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
- V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask),
- getName("insert"));
- DEBUG(dbgs() << " shuffle2: " << *V << "\n");
- }
- } else {
- V = convertValue(TD, IRB, V, VecTy);
- }
+
+ // Mix in the existing elements.
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2607,7 +2638,7 @@ private:
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+ TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
"Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
@@ -2639,6 +2670,51 @@ private:
return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
+ /// \brief Compute an integer value from splatting an i8 across the given
+ /// number of bytes.
+ ///
+ /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
+ /// call this routine.
+ /// FIXME: Heed the abvice above.
+ ///
+ /// \param V The i8 value to splat.
+ /// \param Size The number of bytes in the output (assuming i8 is one byte)
+ Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+ assert(Size > 0 && "Expected a positive number of bytes.");
+ IntegerType *VTy = cast<IntegerType>(V->getType());
+ assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
+ if (Size == 1)
+ return V;
+
+ Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+ ConstantExpr::getUDiv(
+ Constant::getAllOnesValue(SplatIntTy),
+ ConstantExpr::getZExt(
+ Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
+ getName(".isplat"));
+ return V;
+ }
+
+ /// \brief Compute a vector splat for a given element value.
+ Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
+ assert(NumElements > 0 && "Cannot splat to an empty vector.");
+
+ // First insert it into a one-element vector so we can shuffle it. It is
+ // really silly that LLVM's IR requires this in order to form a splat.
+ Value *Undef = UndefValue::get(VectorType::get(V->getType(), 1));
+ V = IRB.CreateInsertElement(Undef, V, IRB.getInt32(0),
+ getName(".splatinsert"));
+
+ // Shuffle the value across the desired number of elements.
+ SmallVector<Constant*, 8> Mask(NumElements, IRB.getInt32(0));
+ V = IRB.CreateShuffleVector(V, Undef, ConstantVector::get(Mask),
+ getName(".splat"));
+ DEBUG(dbgs() << " splat: " << *V << "\n");
+ return V;
+ }
+
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
IRBuilder<> IRB(&II);
@@ -2667,7 +2743,8 @@ private:
(BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaEndOffset ||
!AllocaTy->isSingleValueType() ||
- !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+ !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
+ TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
CallInst *New
@@ -2683,53 +2760,62 @@ private:
// If we can represent this as a simple value, we have to build the actual
// value to store, which requires expanding the byte present in memset to
// a sensible representation for the alloca type. This is essentially
- // splatting the byte to a sufficiently wide integer, bitcasting to the
- // desired scalar type, and splatting it across any desired vector type.
+ // splatting the byte to a sufficiently wide integer, splatting it across
+ // any desired vector width, and bitcasting to the final type.
uint64_t Size = EndOffset - BeginOffset;
- Value *V = II.getValue();
- IntegerType *VTy = cast<IntegerType>(V->getType());
- Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
- if (Size*8 > VTy->getBitWidth())
- V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
- ConstantExpr::getUDiv(
- Constant::getAllOnesValue(SplatIntTy),
- ConstantExpr::getZExt(
- Constant::getAllOnesValue(V->getType()),
- SplatIntTy)),
- getName(".isplat"));
-
- // If this is an element-wide memset of a vectorizable alloca, insert it.
- if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)) {
- if (V->getType() != ScalarTy)
- V = convertValue(TD, IRB, V, ScalarTy);
- StoreInst *Store = IRB.CreateAlignedStore(
- IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
- NewAI.getAlignment(),
- getName(".load")),
- V, IRB.getInt32(getIndex(BeginOffset)),
- getName(".insert")),
- &NewAI, NewAI.getAlignment());
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
- }
+ Value *V = getIntegerSplat(IRB, II.getValue(), Size);
+
+ if (VecTy) {
+ // If this is a memset of a vectorized alloca, insert it.
+ assert(ElementTy == ScalarTy);
+
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ Value *Splat = getIntegerSplat(IRB, II.getValue(),
+ TD.getTypeSizeInBits(ElementTy)/8);
+ Splat = convertValue(TD, IRB, Splat, ElementTy);
+ if (NumElements > 1)
+ Splat = getVectorSplat(IRB, Splat, NumElements);
- // If this is a memset on an alloca where we can widen stores, insert the
- // set integer.
- if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)) {
- assert(!II.isVolatile());
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".oldload"));
- Old = convertValue(TD, IRB, Old, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
- }
+ V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+ } else if (IntTy) {
+ // If this is a memset on an alloca where we can widen stores, insert the
+ // set integer.
+ assert(!II.isVolatile());
- if (V->getType() != AllocaTy)
+ V = getIntegerSplat(IRB, II.getValue(), Size);
+
+ if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaBeginOffset)) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+ } else {
+ assert(V->getType() == IntTy &&
+ "Wrong type for an alloca wide integer!");
+ }
V = convertValue(TD, IRB, V, AllocaTy);
+ } else {
+ // Established these invariants above.
+ assert(BeginOffset == NewAllocaBeginOffset);
+ assert(EndOffset == NewAllocaEndOffset);
+
+ V = getIntegerSplat(IRB, II.getValue(),
+ TD.getTypeSizeInBits(ScalarTy)/8);
+ if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
+ V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
+
+ V = convertValue(TD, IRB, V, AllocaTy);
+ }
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
II.isVolatile());
@@ -2814,37 +2900,22 @@ private:
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
- bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
- EndOffset == NewAllocaEndOffset;
- bool IsVectorElement = VecTy && !IsWholeAlloca;
- uint64_t Size = EndOffset - BeginOffset;
- IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
-
- Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
- : II.getRawDest()->getType();
- if (!EmitMemCpy) {
- if (IsVectorElement)
- OtherPtrTy = VecTy->getElementType()->getPointerTo();
- else if (IntTy && !IsWholeAlloca)
- OtherPtrTy = SubIntTy->getPointerTo();
- else
- OtherPtrTy = NewAI.getType();
- }
-
- // Compute the other pointer, folding as much as possible to produce
- // a single, simple GEP in most cases.
- Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
-
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
+ Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
Pass.Worklist.insert(AI);
if (EmitMemCpy) {
+ Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+ : II.getRawDest()->getType();
+
+ // Compute the other pointer, folding as much as possible to produce
+ // a single, simple GEP in most cases.
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+ getName("." + OtherPtr->getName()));
+
Value *OurPtr
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
: II.getRawSource()->getType());
@@ -2865,18 +2936,38 @@ private:
if (!Align)
Align = 1;
- Value *SrcPtr = OtherPtr;
+ bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset;
+ uint64_t Size = EndOffset - BeginOffset;
+ unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
+ unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+ unsigned NumElements = EndIndex - BeginIndex;
+ IntegerType *SubIntTy
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+ Type *OtherPtrTy = NewAI.getType();
+ if (VecTy && !IsWholeAlloca) {
+ if (NumElements == 1)
+ OtherPtrTy = VecTy->getElementType();
+ else
+ OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
+
+ OtherPtrTy = OtherPtrTy->getPointerTo();
+ } else if (IntTy && !IsWholeAlloca) {
+ OtherPtrTy = SubIntTy->getPointerTo();
+ }
+
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+ getName("." + OtherPtr->getName()));
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
Value *Src;
- if (IsVectorElement && !IsDest) {
- // We have to extract rather than load.
- Src = IRB.CreateExtractElement(
- IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
- IRB.getInt32(getIndex(BeginOffset)),
- getName(".copyextract"));
+ if (VecTy && !IsWholeAlloca && !IsDest) {
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".load"));
@@ -2889,7 +2980,11 @@ private:
getName(".copyload"));
}
- if (IntTy && !IsWholeAlloca && IsDest) {
+ if (VecTy && !IsWholeAlloca && IsDest) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+ } else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
getName(".oldload"));
Old = convertValue(TD, IRB, Old, IntTy);
@@ -2899,14 +2994,6 @@ private:
Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
- if (IsVectorElement && IsDest) {
- // We have to insert into a loaded copy before storing.
- Src = IRB.CreateInsertElement(
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- Src, IRB.getInt32(getIndex(BeginOffset)),
- getName(".insert"));
- }
-
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
(void)Store;