diff options
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 34 | ||||
-rw-r--r-- | test/Transforms/SROA/basictest.ll | 6 |
2 files changed, 31 insertions, 9 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 419756d4dc..7d2ce098aa 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2717,9 +2717,9 @@ private: // If this doesn't map cleanly onto the alloca type, and that type isn't // a single value type, just emit a memcpy. bool EmitMemCpy - = !VecTy && (BeginOffset != NewAllocaBeginOffset || - EndOffset != NewAllocaEndOffset || - !NewAI.getAllocatedType()->isSingleValueType()); + = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset || + EndOffset != NewAllocaEndOffset || + !NewAI.getAllocatedType()->isSingleValueType()); // If we're just going to emit a memcpy, the alloca hasn't changed, and the // size hasn't been shrunk based on analysis of the viable range, this is @@ -2741,14 +2741,23 @@ private: if (Pass.DeadSplitInsts.insert(&II)) Pass.DeadInsts.push_back(&II); - bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset || - EndOffset < NewAllocaEndOffset); + bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset && + EndOffset == NewAllocaEndOffset; + bool IsVectorElement = VecTy && !IsWholeAlloca; + uint64_t Size = EndOffset - BeginOffset; + IntegerType *SubIntTy + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() : II.getRawDest()->getType(); - if (!EmitMemCpy) - OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo() - : NewAI.getType(); + if (!EmitMemCpy) { + if (IsVectorElement) + OtherPtrTy = VecTy->getElementType()->getPointerTo(); + else if (IntTy && !IsWholeAlloca) + OtherPtrTy = SubIntTy->getPointerTo(); + else + OtherPtrTy = NewAI.getType(); + } // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. @@ -2795,11 +2804,20 @@ private: IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), getIndex(IRB, BeginOffset), getName(".copyextract")); + } else if (IntTy && !IsWholeAlloca && !IsDest) { + Src = extractInteger(IRB, SubIntTy, BeginOffset); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), getName(".copyload")); } + if (IntTy && !IsWholeAlloca && IsDest) { + StoreInst *Store = insertInteger(IRB, Src, BeginOffset); + (void)Store; + DEBUG(dbgs() << " to: " << *Store << "\n"); + return true; + } + if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. Src = IRB.CreateInsertElement( diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index b33ffa65f6..644fda167d 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1046,8 +1046,12 @@ entry: ; Or a memset of the whole thing. call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false) - ; Store to the high 32-bits... + ; Write to the high 32-bits with a memcpy. %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4 + %d.raw = bitcast double* %d to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false) + + ; Store to the high 32-bits... %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32* store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4 |