diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 24 |
2 files changed, 15 insertions, 18 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 57a13ac20f..81f502b278 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4383,11 +4383,12 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4); + // Create this as a scalar to vector.. + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } else if (EVT == MVT::i32) { - // InsertPS works with constant index. - if (isa<ConstantSDNode>(N2)) - return Op; + } else if (EVT == MVT::i32 && isa<ConstantSDNode>(N2)) { + // PINSR* works with constant index. + return Op; } return SDValue(); } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2c9a064bd4..eb26ac0516 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -51,7 +51,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; def X86insrtps : SDNode<"X86ISD::INSERTPS", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>; + SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, @@ -3596,32 +3596,28 @@ defm PINSRD : SS41I_insert32<0x22, "pinsrd">; // in the target vector. let Constraints = "$src1 = $dst" in { multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> { - def match_rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2, i32i8imm:$src3), + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, - (X86insrtps VR128:$src1, FR32:$src2, imm:$src3))]>, OpSize; - def match_rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, - (X86insrtps VR128:$src1, (loadf32 addr:$src2), + (X86insrtps VR128:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, OpSize; } } -let Constraints = "$src1 = $dst" in { - def INSERTPSrr : SS4AIi8<0x21, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - "insertps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, (int_x86_sse41_insertps VR128:$src1, - VR128:$src2, imm:$src3))]>; -} - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; + let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; |