Prevent insertion of "vzeroupper" before call that preserves YMM registers, since a caller uses preserved registers across the call.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175043 91177308-0d34-0410-b5e6-96231b3b80d8
author: Elena Demikhovsky <elena.demikhovsky@intel.com> 2013-02-13 08:02:04 +0000
committer: Elena Demikhovsky <elena.demikhovsky@intel.com> 2013-02-13 08:02:04 +0000
commit: d29804f80d1cc26ea552b58693ce883f5b13de7a (patch)
tree: aeb300a914122fd17fdf0a6d4f6184bf650ba49f /test/CodeGen
parent: cef6cfe4a67af030754b4151cd63076c4aab7467 (diff)
1 files changed, 40 insertions, 0 deletions
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 0fec9658d6..055072098a 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -127,3 +127,43 @@ define i32 @test_int(i32 %a, i32 %b) nounwind {
     %c = add i32 %c2, %b
 	ret i32 %c
 }
+
+; WIN64: test_float4
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64-NOT: vzeroupper
+; WIN64: call
+; WIN64: ret
+
+; X64: test_float4
+; X64-NOT: vzeroupper
+; X64: call
+; X64-NOT: vzeroupper
+; X64: call
+; X64: ret
+
+; X32: test_float4
+; X32: vzeroupper
+; X32: call
+; X32: vzeroupper
+; X32: call
+; X32: ret
+
+declare <4 x float> @func_float4(<4 x float>, <4 x float>, <4 x float>)
+
+define <8 x float> @test_float4(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone {
+entry:
+  %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %1 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %call.i = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %0, <4 x float> %1, <4 x float> %2) nounwind
+  %3 = shufflevector <4 x float> %call.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %4 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %5 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %6 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %call.i2 = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %4, <4 x float> %5, <4 x float> %6) nounwind
+  %7 = shufflevector <4 x float> %call.i2, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x float> %8
+}
+
author	Elena Demikhovsky <elena.demikhovsky@intel.com>	2013-02-13 08:02:04 +0000
committer	Elena Demikhovsky <elena.demikhovsky@intel.com>	2013-02-13 08:02:04 +0000
commit	d29804f80d1cc26ea552b58693ce883f5b13de7a (patch)
tree	aeb300a914122fd17fdf0a6d4f6184bf650ba49f /test/CodeGen
parent	cef6cfe4a67af030754b4151cd63076c4aab7467 (diff)