diff options
author | Dale Johannesen <dalej@apple.com> | 2010-09-30 23:57:10 +0000 |
---|---|---|
committer | Dale Johannesen <dalej@apple.com> | 2010-09-30 23:57:10 +0000 |
commit | 0488fb649a56b7fc89a5814df5308813f9e5a85d (patch) | |
tree | 21913bb81960866b73c6b49f29782c142563304c /lib | |
parent | a7e3b564773cc45a1b08383c09fe86b17b3ffe92 (diff) |
Massive rewrite of MMX:
The x86_mmx type is used for MMX intrinsics, parameters and
return values where these use MMX registers, and is also
supported in load, store, and bitcast.
Only the above operations generate MMX instructions, and optimizations
do not operate on or produce MMX intrinsics.
MMX-sized vectors <2 x i32> etc. are lowered to XMM or split into
smaller pieces. Optimizations may occur on these forms and the
result casted back to x86_mmx, provided the result feeds into a
previous existing x86_mmx operation.
The point of all this is prevent optimizations from introducing
MMX operations, which is unsafe due to the EMMS problem.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@115243 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 60 | ||||
-rw-r--r-- | lib/Target/X86/X86CallingConv.td | 19 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 271 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 51 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 562 | ||||
-rw-r--r-- | lib/Target/X86/X86MCInstLower.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.td | 2 | ||||
-rw-r--r-- | lib/Transforms/IPO/DeadArgumentElimination.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 6 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 535 | ||||
-rw-r--r-- | lib/VMCore/Instructions.cpp | 14 | ||||
-rw-r--r-- | lib/VMCore/Type.cpp | 14 |
13 files changed, 749 insertions, 804 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index adc225b94b..2c77050b3c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4322,6 +4322,66 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = getValue(I.getArgOperand(1)); + if (isa<ConstantSDNode>(ShAmt)) { + visitTargetIntrinsic(I, Intrinsic); + return 0; + } + unsigned NewIntrinsic = 0; + EVT ShAmtVT = MVT::v2i32; + switch (Intrinsic) { + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + // We must do this early because v2i32 is not a legal type. + DebugLoc dl = getCurDebugLoc(); + SDValue ShOps[2]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI.getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(NewIntrinsic, MVT::i32), + getValue(I.getArgOperand(0)), ShAmt); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e3409effc3..487e39f8c2 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, + CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // And FP in XMM0 only. CCIfType<[f32], CCAssignToReg<[XMM0]>>, @@ -161,7 +161,7 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32], + CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", CCPromoteToType<v2i64>>>>, @@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[ CCAssignToStack<32, 32>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64], - CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], @@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[f80], CCAssignToStack<0, 0>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; def CC_X86_64_GHC : CallingConv<[ @@ -269,7 +268,7 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32], + CCIfNotVarArg<CCIfType<[x86mmx], CCAssignToReg<[MM0, MM1, MM2]>>>, // Integer/Float values get stored in stack slots that are 4 bytes in @@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ // Promote i8/i16 arguments to i32. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 55daf769cf..7da9ad6da2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -218,11 +218,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); if (Subtarget->is64Bit()) { setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); - // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. - if (Subtarget->hasMMX() && !DisableMMX) - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); - else - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + // Without SSE, i64->f64 goes through memory. + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); } } @@ -615,90 +612,40 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // with -msoft-float, disable use of MMX as well. if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass, false); - - // FIXME: Remove the rest of this stuff. - addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - - addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); - - setOperationAction(ISD::ADD, MVT::v8i8, Legal); - setOperationAction(ISD::ADD, MVT::v4i16, Legal); - setOperationAction(ISD::ADD, MVT::v2i32, Legal); - setOperationAction(ISD::ADD, MVT::v1i64, Legal); - - setOperationAction(ISD::SUB, MVT::v8i8, Legal); - setOperationAction(ISD::SUB, MVT::v4i16, Legal); - setOperationAction(ISD::SUB, MVT::v2i32, Legal); - setOperationAction(ISD::SUB, MVT::v1i64, Legal); - - setOperationAction(ISD::MULHS, MVT::v4i16, Legal); - setOperationAction(ISD::MUL, MVT::v4i16, Legal); - - setOperationAction(ISD::AND, MVT::v8i8, Promote); - AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v4i16, Promote); - AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v2i32, Promote); - AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v1i64, Legal); - - setOperationAction(ISD::OR, MVT::v8i8, Promote); - AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v4i16, Promote); - AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v2i32, Promote); - AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v1i64, Legal); - - setOperationAction(ISD::XOR, MVT::v8i8, Promote); - AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v4i16, Promote); - AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v2i32, Promote); - AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v1i64, Legal); - - setOperationAction(ISD::LOAD, MVT::v8i8, Promote); - AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Promote); - AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2i32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v1i64, Legal); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - - setOperationAction(ISD::SELECT, MVT::v8i8, Promote); - setOperationAction(ISD::SELECT, MVT::v4i16, Promote); - setOperationAction(ISD::SELECT, MVT::v2i32, Promote); - setOperationAction(ISD::SELECT, MVT::v1i64, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); - - if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); - } - } + // No operations on x86mmx supported, everything uses intrinsics. + } + + // MMX-sized vectors (other than x86mmx) are expected to be expanded + // into smaller operations. + setOperationAction(ISD::MULHS, MVT::v8i8, Expand); + setOperationAction(ISD::MULHS, MVT::v4i16, Expand); + setOperationAction(ISD::MULHS, MVT::v2i32, Expand); + setOperationAction(ISD::MULHS, MVT::v1i64, Expand); + setOperationAction(ISD::AND, MVT::v8i8, Expand); + setOperationAction(ISD::AND, MVT::v4i16, Expand); + setOperationAction(ISD::AND, MVT::v2i32, Expand); + setOperationAction(ISD::AND, MVT::v1i64, Expand); + setOperationAction(ISD::OR, MVT::v8i8, Expand); + setOperationAction(ISD::OR, MVT::v4i16, Expand); + setOperationAction(ISD::OR, MVT::v2i32, Expand); + setOperationAction(ISD::OR, MVT::v1i64, Expand); + setOperationAction(ISD::XOR, MVT::v8i8, Expand); + setOperationAction(ISD::XOR, MVT::v4i16, Expand); + setOperationAction(ISD::XOR, MVT::v2i32, Expand); + setOperationAction(ISD::XOR, MVT::v1i64, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); + setOperationAction(ISD::SELECT, MVT::v8i8, Expand); + setOperationAction(ISD::SELECT, MVT::v4i16, Expand); + setOperationAction(ISD::SELECT, MVT::v2i32, Expand); + setOperationAction(ISD::SELECT, MVT::v1i64, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Expand); if (!UseSoftFloat && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); @@ -821,10 +768,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - if (!DisableMMX && Subtarget->hasMMX()) { - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - } } if (Subtarget->hasSSE41()) { @@ -1210,8 +1153,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ RRC = (Subtarget->is64Bit() ? X86::GR64RegisterClass : X86::GR32RegisterClass); break; - case MVT::v8i8: case MVT::v4i16: - case MVT::v2i32: case MVT::v1i64: + case MVT::x86mmx: RRC = X86::VR64RegisterClass; break; case MVT::f32: case MVT::f64: @@ -1345,12 +1287,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); - // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) @@ -1650,7 +1591,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = X86::VR256RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + else if (RegVT == MVT::x86mmx) RC = X86::VR64RegisterClass; else llvm_unreachable("Unknown argument type!"); @@ -1673,9 +1614,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. if (RegVT.isVector()) { - ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - ArgValue, DAG.getConstant(0, MVT::i64)); - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), + ArgValue); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } @@ -2876,7 +2816,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { - if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + if (VT == MVT::v4f32 || VT == MVT::v4i32 ) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) return (Mask[0] < 2 && Mask[1] < 2); @@ -3548,13 +3488,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (VT.getSizeInBits() == 64) { // MMX - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else if (VT.getSizeInBits() == 128) { + if (VT.getSizeInBits() == 128) { // SSE if (HasSSE2) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -3582,10 +3519,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 64) // MMX - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else // SSE - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } @@ -4025,8 +3959,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - bool isMMX = VT.getSizeInBits() == 64; - EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -4180,10 +4113,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(Op.getNode()) || (Op.getValueType().getSizeInBits() != 256 && ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to + // Canonicalize this to <4 x i32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) + if (Op.getValueType() == MVT::v4i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -4234,9 +4167,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { - // Handle MMX and SSE both. - EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; - unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; + // Handle SSE only. + assert(VT == MVT::v2i64 && "Expected an SSE value type!"); + EVT VecVT = MVT::v4i32; + unsigned VecElts = 4; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). @@ -4275,7 +4209,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + EVT MiddleVT = MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); @@ -5418,11 +5353,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); - // FIXME: this is somehow handled during isel by MMX pattern fragments. Remove - // the check or come up with another solution when all MMX move to intrinsics, - // but don't allow this to be considered legal, we don't want vector_shuffle - // operations to be matched during isel anymore. - if (isMMX && SVOp->isSplat()) + // Shuffle operations on MMX not supported. + if (isMMX) return Op; // Vector shuffle lowering takes 3 steps: @@ -5456,10 +5388,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef && - RelaxedMayFoldVectorLoad(V1) && !isMMX) + RelaxedMayFoldVectorLoad(V1)) return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); - if (!isMMX && X86::isMOVHLPS_v_undef_Mask(SVOp)) + if (X86::isMOVHLPS_v_undef_Mask(SVOp)) return getMOVHighToLow(Op, dl, DAG); // Use to match splats @@ -5507,7 +5439,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!isMMX && !X86::isMOVLPMask(SVOp)) { + if (!X86::isMOVLPMask(SVOp)) { if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); @@ -5517,22 +5449,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (!isMMX) { - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) - return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); - if (X86::isMOVHLPSMask(SVOp)) - return getMOVHighToLow(Op, dl, DAG); + if (X86::isMOVHLPSMask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); + if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); + if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); - if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasSSE2); - } + if (X86::isMOVLPMask(SVOp)) + return getMOVLP(Op, dl, DAG, HasSSE2); if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -5573,12 +5503,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -5602,18 +5530,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } - // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Normalize the node to match x86 shuffle ops if needed - if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are @@ -5627,12 +5551,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShufflePALIGNRImmediate(SVOp), DAG); - // Only a few shuffle masks are handled for 64-bit vectors (MMX), and - // 64-bit vectors which made to this point can't be handled, they are - // expanded. - if (isMMX) - return SDValue(); - if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { if (VT == MVT::v2f64) @@ -5681,8 +5599,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } - // Handle all 4 wide cases with a number of shuffles except for MMX. - if (NumElems == 4 && !isMMX) + // Handle all 4 wide cases with a number of shuffles. + if (NumElems == 4) return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); @@ -5824,8 +5742,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, unsigned Opc; if (VT == MVT::v8i16) Opc = X86ISD::PINSRW; - else if (VT == MVT::v4i16) - Opc = X86ISD::MMX_PINSRW; else if (VT == MVT::v16i8) Opc = X86ISD::PINSRB; else @@ -5881,8 +5797,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, - dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); } return SDValue(); } @@ -5896,16 +5811,10 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - EVT VT = MVT::v2i32; - switch (Op.getValueType().getSimpleVT().SimpleTy) { - default: break; - case MVT::v16i8: - case MVT::v8i16: - VT = MVT::v4i32; - break; - } + assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 && + "Expected an SSE type!"); return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt)); + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as @@ -6322,11 +6231,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); - if (SrcVT.isVector()) { - if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) - return Op; + if (SrcVT.isVector()) return SDValue(); - } assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -6702,13 +6608,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v2i32 && - Op.getOperand(0).getValueType() == MVT::v2f64) { - return Op; - } + if (Op.getValueType().isVector()) return SDValue(); - } std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; @@ -7211,11 +7112,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { switch (VT.getSimpleVT().SimpleTy) { default: break; - case MVT::v8i8: case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::v4i16: case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::v2i32: case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } @@ -7930,6 +7828,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); } else { ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); +// FIXME this must be lowered to get rid of the invalid type. } EVT VT = Op.getValueType(); @@ -8840,7 +8739,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; - case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -9711,7 +9609,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: - case X86::CMOV_V1I64: case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_V4F32: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2a86fa8c70..1a2da7401b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -128,11 +128,15 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector. - /// Can be used to move a vector value from a MMX register to a XMM - /// register. + /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. MOVQ2DQ, + /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector + /// to an MMX vector. If you think this is too close to the previous + /// mnemonic, so do I; blame Intel. + MOVDQ2Q, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 104f880032..70c3d076d5 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -15,51 +15,8 @@ // MMX Pattern Fragments //===----------------------------------------------------------------------===// -def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; - -def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; -def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; -def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; -def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; - -//===----------------------------------------------------------------------===// -// MMX Masks -//===----------------------------------------------------------------------===// - -// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to -// PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; |