diff options
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 6 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-vinsertf128.ll | 13 |
2 files changed, 16 insertions, 3 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 882d647327..38744c3eeb 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7759,15 +7759,15 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (alignedloadv4f32 addr:$src2), +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), (i32 imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (alignedloadv2f64 addr:$src2), +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), (i32 imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (alignedloadv2i64 addr:$src2), +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), (i32 imm)), (VINSERTF128rm VR256:$src1, addr:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll index 9934a33041..3e86a2ae5e 100644 --- a/test/CodeGen/X86/avx-vinsertf128.ll +++ b/test/CodeGen/X86/avx-vinsertf128.ll @@ -116,3 +116,16 @@ entry: %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) ret <8 x float> %2 } + +rdar://11076953 +; CHECK: vinsertf128_ucombine +define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp { +; CHECK-NOT: vmovups +; CHECK: vinsertf128 +entry: + %add.ptr = getelementptr inbounds float* %f, i64 4 + %0 = bitcast float* %add.ptr to <4 x float>* + %1 = load <4 x float>* %0, align 8 + %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) + ret <8 x float> %2 +} |