Implement PR8644: forwarding a memcpy value to a byval,

allowing the memcpy to be eliminated. Unfortunately, the requirements on byval's without explicit alignment are really weak and impossible to predict in the mid-level optimizer, so this doesn't kick in much with current frontends. The fix is to change clang to set alignment on all byval arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@119916 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-11-21 00:28:59 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-11-21 00:28:59 +0000
commit: 2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3 (patch)
tree: 0368003df16ef9b625afc9cdca10357bf6f22268 /test
parent: a6fd81dd7f6039fbc1a55f6f4d45659fffdd81fb (diff)
2 files changed, 17 insertions, 2 deletions
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 303c2fccee..7309319c46 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -62,3 +62,18 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret void
 }
+
+
+; PR8644
+define void @test4(i8 *%P) {
+  %A = alloca {i32, i32}
+  %a = bitcast {i32, i32}* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
+  call void @test4a(i8* byval align 1 %a) 
+  ret void
+; CHECK: @test4
+; CHECK-NEXT: call void @test4a(
+}
+
+declare void @test4a(i8* byval align 1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index d35ab910d7..ddfd0fd1fc 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  %z) nounwind  {
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
 entry:
 	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
 	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
@@ -16,7 +16,7 @@ entry:
 	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp3, x86_fp80* %real, align 16
 	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval  %iz ) nounwind 
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
 	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
 	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
author	Chris Lattner <sabre@nondot.org>	2010-11-21 00:28:59 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-11-21 00:28:59 +0000
commit	2f5f90ad3e9b00cf21ae8e3f55b93f0be1d504c3 (patch)
tree	0368003df16ef9b625afc9cdca10357bf6f22268 /test
parent	a6fd81dd7f6039fbc1a55f6f4d45659fffdd81fb (diff)