diff options
author | Mon P Wang <wangmp@apple.com> | 2009-04-03 02:43:30 +0000 |
---|---|---|
committer | Mon P Wang <wangmp@apple.com> | 2009-04-03 02:43:30 +0000 |
commit | 1e95580925b173ea13fbe77c0eb299f672391656 (patch) | |
tree | 73b102baf920cd2081ebc835f6e3b46558990c1c | |
parent | f436fed4124af0ece0f7125f0963f42b859ca36c (diff) |
Added a x86 dag combine to increase the chances to use a
movq for v2i64 on x86-32.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68368 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 32 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_i64.ll | 22 |
2 files changed, 52 insertions, 2 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b9c17f2303..c5a6acbf7a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8061,15 +8061,43 @@ static bool EltsFromConsecutiveLoads(SDNode *N, SDValue PermMask, /// PerformShuffleCombine - Combine a vector_shuffle that is equal to /// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load /// if the load addresses are consecutive, non-overlapping, and in the right -/// order. +/// order. In the case of v2i64, it will see if it can rewrite the +/// shuffle to be an appropriate build vector so it can take advantage of +// performBuildVectorCombine. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); DebugLoc dl = N->getDebugLoc(); MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); SDValue PermMask = N->getOperand(2); unsigned NumElems = PermMask.getNumOperands(); + + // For x86-32 machines, if we see an insert and then a shuffle in a v2i64 + // where the upper half is 0, it is advantageous to rewrite it as a build + // vector of (0, val) so it can use movq. + if (VT == MVT::v2i64) { + SDValue In[2]; + In[0] = N->getOperand(0); + In[1] = N->getOperand(1); + unsigned Idx0 =cast<ConstantSDNode>(PermMask.getOperand(0))->getZExtValue(); + unsigned Idx1 =cast<ConstantSDNode>(PermMask.getOperand(1))->getZExtValue(); + if (In[0].getValueType().getVectorNumElements() == NumElems && + In[Idx0/2].getOpcode() == ISD::INSERT_VECTOR_ELT && + In[Idx1/2].getOpcode() == ISD::BUILD_VECTOR) { + ConstantSDNode* InsertVecIdx = + dyn_cast<ConstantSDNode>(In[Idx0/2].getOperand(2)); + if (InsertVecIdx && + InsertVecIdx->getZExtValue() == (Idx0 % 2) && + isZeroNode(In[Idx1/2].getOperand(Idx1 % 2))) { + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, + In[Idx0/2].getOperand(1), + In[Idx1/2].getOperand(Idx1 % 2)); + } + } + } + + // Try to combine a vector_shuffle into a 128-bit load. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDNode *Base = NULL; if (!EltsFromConsecutiveLoads(N, PermMask, NumElems, EVT, Base, DAG, MFI, TLI)) diff --git a/test/CodeGen/X86/vec_i64.ll b/test/CodeGen/X86/vec_i64.ll new file mode 100644 index 0000000000..3939af57c8 --- /dev/null +++ b/test/CodeGen/X86/vec_i64.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -o %t -f +; RUN: grep movq %t | count 2 + +; Used movq to load i64 into a v2i64 when the top i64 is 0. + +define <2 x i64> @foo1(i64* %y) nounwind { +entry: + %tmp1 = load i64* %y, align 8 ; <i64> [#uses=1] + %s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0 + %loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1> + ret <2 x i64> %loadl +} + + +define <4 x float> @foo2(i64* %p) nounwind { +entry: + %load = load i64* %p + %s2v = insertelement <2 x i64> undef, i64 %load, i32 0 + %loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1> + %0 = bitcast <2 x i64> %loadl to <4 x float> + ret <4 x float> %0 +} |