diff options
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 371 |
1 files changed, 262 insertions, 109 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index ca76251492..3e84a91c1d 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -447,6 +447,7 @@ protected: bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) { GEPOffset = Offset; + unsigned int AS = GEPI.getPointerAddressSpace(); for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); GTI != GTE; ++GTI) { ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); @@ -476,7 +477,7 @@ protected: continue; } - APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits()); + APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS)); Index *= APInt(Index.getBitWidth(), TD.getTypeAllocSize(GTI.getIndexedType())); Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset, @@ -1784,7 +1785,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, break; if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) { ElementTy = SeqTy->getElementType(); - Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0))); + Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits( + ElementTy->isPointerTy() ? + cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0))); } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { if (STy->element_begin() == STy->element_end()) break; // Nothing left to descend into. @@ -2004,6 +2007,51 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, return Ptr; } +/// \brief Test whether we can convert a value from the old to the new type. +/// +/// This predicate should be used to guard calls to convertValue in order to +/// ensure that we only try to convert viable values. The strategy is that we +/// will peel off single element struct and array wrappings to get to an +/// underlying value, and convert that value. +static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { + if (OldTy == NewTy) + return true; + if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) + return false; + if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) + return false; + + if (NewTy->isPointerTy() || OldTy->isPointerTy()) { + if (NewTy->isPointerTy() && OldTy->isPointerTy()) + return true; + if (NewTy->isIntegerTy() || OldTy->isIntegerTy()) + return true; + return false; + } + + return true; +} + +/// \brief Generic routine to convert an SSA value to a value of a different +/// type. +/// +/// This will try various different casting techniques, such as bitcasts, +/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test +/// two types for viability with this routine. +static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, + Type *Ty) { + assert(canConvertValue(DL, V->getType(), Ty) && + "Value not convertable to type"); + if (V->getType() == Ty) + return V; + if (V->getType()->isIntegerTy() && Ty->isPointerTy()) + return IRB.CreateIntToPtr(V, Ty); + if (V->getType()->isPointerTy() && Ty->isIntegerTy()) + return IRB.CreatePtrToInt(V, Ty); + + return IRB.CreateBitCast(V, Ty); +} + /// \brief Test whether the given alloca partition can be promoted to a vector. /// /// This is a quick test to check whether we can rewrite a particular alloca @@ -2075,47 +2123,74 @@ static bool isVectorPromotionViable(const DataLayout &TD, return true; } -/// \brief Test whether the given alloca partition can be promoted to an int. +/// \brief Test whether the given alloca partition's integer operations can be +/// widened to promotable ones. /// -/// This is a quick test to check whether we can rewrite a particular alloca -/// partition (and its newly formed alloca) into an integer alloca suitable for -/// promotion to an SSA value. We only can ensure this for a limited set of -/// operations, and we don't want to do the rewrites unless we are confident -/// that the result will be promotable, so we have an early test here. -static bool isIntegerPromotionViable(const DataLayout &TD, - Type *AllocaTy, - uint64_t AllocBeginOffset, - AllocaPartitioning &P, - AllocaPartitioning::const_use_iterator I, - AllocaPartitioning::const_use_iterator E) { - IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy); - if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth()) +/// This is a quick test to check whether we can rewrite the integer loads and +/// stores to a particular alloca into wider loads and stores and be able to +/// promote the resulting alloca. +static bool isIntegerWideningViable(const DataLayout &TD, + Type *AllocaTy, + uint64_t AllocBeginOffset, + AllocaPartitioning &P, + AllocaPartitioning::const_use_iterator I, + AllocaPartitioning::const_use_iterator E) { + uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy); + + // Don't try to handle allocas with bit-padding. + if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy)) return false; + uint64_t Size = TD.getTypeStoreSize(AllocaTy); + // Check the uses to ensure the uses are (likely) promoteable integer uses. // Also ensure that the alloca has a covering load or store. We don't want - // promote because of some other unsplittable entry (which we may make - // splittable later) and lose the ability to promote each element access. + // to widen the integer operotains only to fail to promote due to some other + // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { if (!I->U) continue; // Skip dead use. + uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; + uint64_t RelEnd = I->EndOffset - AllocBeginOffset; + // We can't reasonably handle cases where the load or store extends past // the end of the aloca's type and into its padding. - if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty)) + if (RelEnd > Size) return false; if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { - if (LI->isVolatile() || !LI->getType()->isIntegerTy()) + if (LI->isVolatile()) return false; - if (LI->getType() == Ty) + if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { + if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + return false; + continue; + } + // Non-integer loads need to be convertible from the alloca type so that + // they are promotable. + if (RelBegin != 0 || RelEnd != Size || + !canConvertValue(TD, AllocaTy, LI->getType())) + return false; } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { - if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy()) + Type *ValueTy = SI->getValueOperand()->getType(); + if (SI->isVolatile()) return false; - if (SI->getValueOperand()->getType() == Ty) + if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; + if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) { + if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + return false; + continue; + } + // Non-integer stores need to be convertible to the alloca type so that + // they are promotable. + if (RelBegin != 0 || RelEnd != Size || + !canConvertValue(TD, ValueTy, AllocaTy)) + return false; } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { if (MI->isVolatile()) return false; @@ -2125,6 +2200,10 @@ static bool isIntegerPromotionViable(const DataLayout &TD, if (!MTO.IsSplittable) return false; } + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; } else { return false; } @@ -2149,6 +2228,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, SROA &Pass; AllocaInst &OldAI, &NewAI; const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; + Type *NewAllocaTy; // If we are rewriting an alloca partition which can be written as pure // vector operations, we stash extra information here. When VecTy is @@ -2164,10 +2244,10 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, uint64_t ElementSize; // This is a convenience and flag variable that will be null unless the new - // alloca has a promotion-targeted integer type due to passing - // isIntegerPromotionViable above. If it is non-null does, the desired + // alloca's integer operations should be widened to this integer type due to + // passing isIntegerWideningViable above. If it is non-null, the desired // integer type will be stored here for easy access during rewriting. - IntegerType *IntPromotionTy; + IntegerType *IntTy; // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; @@ -2186,7 +2266,8 @@ public: OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset), - VecTy(), ElementTy(), ElementSize(), IntPromotionTy(), + NewAllocaTy(NewAI.getAllocatedType()), + VecTy(), ElementTy(), ElementSize(), IntTy(), BeginOffset(), EndOffset() { } @@ -2202,9 +2283,10 @@ public: assert((VecTy->getScalarSizeInBits() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ElementSize = VecTy->getScalarSizeInBits() / 8; - } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(), - NewAllocaBeginOffset, P, I, E)) { - IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType()); + } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(), + NewAllocaBeginOffset, P, I, E)) { + IntTy = Type::getIntNTy(NewAI.getContext(), + TD.getTypeSizeInBits(NewAI.getAllocatedType())); } bool CanSROA = true; for (; I != E; ++I) { @@ -2223,6 +2305,10 @@ public: ElementTy = 0; ElementSize = 0; } + if (IntTy) { + assert(CanSROA); + IntTy = 0; + } return CanSROA; } @@ -2239,7 +2325,8 @@ private: Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); - APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); + unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace(); + APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset); return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); } @@ -2286,55 +2373,56 @@ private: Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, uint64_t Offset) { - assert(IntPromotionTy && "Alloca is not an integer we can extract from"); + assert(IntTy && "We cannot extract an integer from the alloca"); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); + V = convertValue(TD, IRB, V, IntTy); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; assert(TD.getTypeStoreSize(TargetTy) + RelOffset <= - TD.getTypeStoreSize(IntPromotionTy) && + TD.getTypeStoreSize(IntTy) && "Element load outside of alloca store"); uint64_t ShAmt = 8*RelOffset; if (TD.isBigEndian()) - ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - + ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(TargetTy) - RelOffset); if (ShAmt) V = IRB.CreateLShr(V, ShAmt, getName(".shift")); - if (TargetTy != IntPromotionTy) { - assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() && - "Cannot extract to a larger integer!"); + assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() && + "Cannot extract to a larger integer!"); + if (TargetTy != IntTy) V = IRB.CreateTrunc(V, TargetTy, getName(".trunc")); - } return V; } StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { IntegerType *Ty = cast<IntegerType>(V->getType()); - if (Ty == IntPromotionTy) - return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - - assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && + assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext")); + if (Ty != IntTy) + V = IRB.CreateZExt(V, IntTy, getName(".ext")); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; assert(TD.getTypeStoreSize(Ty) + RelOffset <= - TD.getTypeStoreSize(IntPromotionTy) && + TD.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); uint64_t ShAmt = 8*RelOffset; if (TD.isBigEndian()) - ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - TD.getTypeStoreSize(Ty) + ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty) - RelOffset); if (ShAmt) V = IRB.CreateShl(V, ShAmt, getName(".shift")); - APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()).shl(ShAmt); - Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, - NewAI.getAlignment(), - getName(".oldload")), - Mask, getName(".mask")); - return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), - &NewAI, NewAI.getAlignment()); + if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { + APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".oldload")); + Old = convertValue(TD, IRB, Old, IntTy); + Old = IRB.CreateAnd(Old, Mask, getName(".mask")); + V = IRB.CreateOr(Old, V, getName(".insert")); + } + V = convertValue(TD, IRB, V, NewAllocaTy); + return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); } void deleteIfTriviallyDead(Value *V) { @@ -2343,15 +2431,6 @@ private: Pass.DeadInsts.push_back(I); } - Value *getValueCast(IRBuilder<> &IRB, Value *V, Type *Ty) { - if (V->getType()->isIntegerTy() && Ty->isPointerTy()) - return IRB.CreateIntToPtr(V, Ty); - if (V->getType()->isPointerTy() && Ty->isIntegerTy()) - return IRB.CreatePtrToInt(V, Ty); - - return IRB.CreateBitCast(V, Ty); - } - bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { Value *Result; if (LI.getType() == VecTy->getElementType() || @@ -2364,7 +2443,7 @@ private: getName(".load")); } if (Result->getType() != LI.getType()) - Result = getValueCast(IRB, Result, LI.getType()); + Result = convertValue(TD, IRB, Result, LI.getType()); LI.replaceAllUsesWith(Result); Pass.DeadInsts.push_back(&LI); @@ -2390,9 +2469,23 @@ private: if (VecTy) return rewriteVectorizedLoadInst(IRB, LI, OldOp); - if (IntPromotionTy) + if (IntTy && LI.getType()->isIntegerTy()) return rewriteIntegerLoad(IRB, LI); + if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, NewAllocaTy, LI.getType())) { + Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + LI.isVolatile(), getName(".load")); + Value *NewV = convertValue(TD, IRB, NewLI, LI.getType()); + LI.replaceAllUsesWith(NewV); + Pass.DeadInsts.push_back(&LI); + + DEBUG(dbgs() << " to: " << *NewLI << "\n"); + return !LI.isVolatile(); + } + + assert(!IntTy && "Invalid load found with int-op widening enabled"); + Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); @@ -2409,13 +2502,13 @@ private: if (V->getType() == ElementTy || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) - V = getValueCast(IRB, V, ElementTy); + V = convertValue(TD, IRB, V, ElementTy); LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")); V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); } else if (V->getType() != VecTy) { - V = getValueCast(IRB, V, VecTy); + V = convertValue(TD, IRB, V, VecTy); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); @@ -2442,16 +2535,31 @@ private: if (VecTy) return rewriteVectorizedStoreInst(IRB, SI, OldOp); - if (IntPromotionTy) + Type *ValueTy = SI.getValueOperand()->getType(); + if (IntTy && ValueTy->isIntegerTy()) return rewriteIntegerStore(IRB, SI); // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after promoting this alloca. - if (SI.getValueOperand()->getType()->isPointerTy()) + if (ValueTy->isPointerTy()) if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand() ->stripInBoundsOffsets())) Pass.PostPromotionWorklist.insert(AI); + if (BeginOffset == NewAllocaBeginOffset && + canConvertValue(TD, ValueTy, NewAllocaTy)) { + Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy); + StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(), + SI.isVolatile()); + (void)NewSI; + Pass.DeadInsts.push_back(&SI); + + DEBUG(dbgs() << " to: " << *NewSI << "\n"); + return !SI.isVolatile(); + } + + assert(!IntTy && "Invalid store found with int-op widening enabled"); + Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); @@ -2487,10 +2595,11 @@ private: // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memset. - if (!VecTy && (BeginOffset != NewAllocaBeginOffset || - EndOffset != NewAllocaEndOffset || - !AllocaTy->isSingleValueType() || - !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { + if (!VecTy && !IntTy && + (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaEndOffset || + !AllocaTy->isSingleValueType() || + !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); CallInst *New @@ -2508,32 +2617,24 @@ private: // a sensible representation for the alloca type. This is essentially // splatting the byte to a sufficiently wide integer, bitcasting to the // desired scalar type, and splatting it across any desired vector type. + uint64_t Size = EndOffset - BeginOffset; Value *V = II.getValue(); IntegerType *VTy = cast<IntegerType>(V->getType()); - Type *IntTy = Type::getIntNTy(VTy->getContext(), - TD.getTypeSizeInBits(ScalarTy)); - if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth()) - V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")), + Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); + if (Size*8 > VTy->getBitWidth()) + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), ConstantExpr::getUDiv( - Constant::getAllOnesValue(IntTy), + Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), - IntTy)), + SplatIntTy)), getName(".isplat")); - if (V->getType() != ScalarTy) { - if (ScalarTy->isPointerTy()) - V = IRB.CreateIntToPtr(V, ScalarTy); - else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy()) - V = IRB.CreateBitCast(V, ScalarTy); - else if (ScalarTy->isIntegerTy()) - llvm_unreachable("Computed different integer types with equal widths"); - else - llvm_unreachable("Invalid scalar type"); - } // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { + if (V->getType() != ScalarTy) + V = convertValue(TD, IRB, V, ScalarTy); StoreInst *Store = IRB.CreateAlignedStore( IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), @@ -2546,18 +2647,20 @@ private: return true; } - // Splat to a vector if needed. - if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) { - VectorType *SplatSourceTy = VectorType::get(V->getType(), 1); - V = IRB.CreateShuffleVector( - IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V, - IRB.getInt32(0), getName(".vsplat.insert")), - UndefValue::get(SplatSourceTy), - ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)), - getName(".vsplat.shuffle")); - assert(V->getType() == VecTy); + // If this is a memset on an alloca where we can widen stores, insert the + // set integer. + if (IntTy && (BeginOffset > NewAllocaBeginOffset || + EndOffset < NewAllocaEndOffset)) { + assert(!II.isVolatile()); + StoreInst *Store = insertInteger(IRB, V, BeginOffset); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; } + if (V->getType() != AllocaTy) + V = convertValue(TD, IRB, V, AllocaTy); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), II.isVolatile()); (void)New; @@ -2578,8 +2681,10 @@ private: const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(II); + assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!"); + unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace(); // Compute the relative offset within the transfer. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD.getPointerSizeInBits(AS); APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin : MTO.SourceBegin)); @@ -2618,9 +2723,9 @@ private: // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memcpy. bool EmitMemCpy - = !VecTy && (BeginOffset != NewAllocaBeginOffset || - EndOffset != NewAllocaEndOffset || - !NewAI.getAllocatedType()->isSingleValueType()); + = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaEndOffset || + !NewAI.getAllocatedType()->isSingleValueType()); // If we're just going to emit a memcpy, the alloca hasn't changed, and the // size hasn't been shrunk based on analysis of the viable range, this is @@ -2642,14 +2747,23 @@ private: if (Pass.DeadSplitInsts.insert(&II)) Pass.DeadInsts.push_back(&II); - bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset); + bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset; + bool IsVectorElement = VecTy && !IsWholeAlloca; + uint64_t Size = EndOffset - BeginOffset; + IntegerType *SubIntTy + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() : II.getRawDest()->getType(); - if (!EmitMemCpy) - OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo() - : NewAI.getType(); + if (!EmitMemCpy) { + if (IsVectorElement) + OtherPtrTy = VecTy->getElementType()->getPointerTo(); + else if (IntTy && !IsWholeAlloca) + OtherPtrTy = SubIntTy->getPointerTo(); + else + OtherPtrTy = NewAI.getType(); + } // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. @@ -2696,11 +2810,20 @@ private: IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), getIndex(IRB, BeginOffset), getName(".copyextract")); + } else if (IntTy && !IsWholeAlloca && !IsDest) { + Src = extractInteger(IRB, SubIntTy, BeginOffset); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), getName(".copyload")); } + if (IntTy && !IsWholeAlloca && IsDest) { + StoreInst *Store = insertInteger(IRB, Src, BeginOffset); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; + } + if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. Src = IRB.CreateInsertElement( @@ -2993,6 +3116,36 @@ private: }; } +/// \brief Strip aggregate type wrapping. +/// +/// This removes no-op aggregate types wrapping an underlying type. It will +/// strip as many layers of types as it can without changing either the type +/// size or the allocated size. +static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { + if (Ty->isSingleValueType()) + return Ty; + + uint64_t AllocSize = DL.getTypeAllocSize(Ty); + uint64_t TypeSize = DL.getTypeSizeInBits(Ty); + + Type *InnerTy; + if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { + InnerTy = ArrTy->getElementType(); + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Index = SL->getElementContainingOffset(0); + InnerTy = STy->getElementType(Index); + } else { + return Ty; + } + + if (AllocSize > DL.getTypeAllocSize(InnerTy) || + TypeSize > DL.getTypeSizeInBits(InnerTy)) + return Ty; + + return stripAggregateTypeWrapping(DL, InnerTy); +} + /// \brief Try to find a partition of the aggregate type passed in for a given /// offset and size. /// @@ -3009,7 +3162,7 @@ private: static Type *getTypePartition(const DataLayout &TD, Type *Ty, uint64_t Offset, uint64_t Size) { if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size) - return Ty; + return stripAggregateTypeWrapping(TD, Ty); if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { // We can't partition pointers... @@ -3038,7 +3191,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, assert(Offset == 0); if (Size == ElementSize) - return ElementTy; + return stripAggregateTypeWrapping(TD, ElementTy); assert(Size > ElementSize); uint64_t NumElements = Size / ElementSize; if (NumElements * ElementSize != Size) @@ -3074,7 +3227,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, assert(Offset == 0); if (Size == ElementSize) - return ElementTy; + return stripAggregateTypeWrapping(TD, ElementTy); StructType::element_iterator EI = STy->element_begin() + Index, EE = STy->element_end(); |