diff options
author | Evan Cheng <evan.cheng@apple.com> | 2010-02-16 21:09:44 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2010-02-16 21:09:44 +0000 |
commit | ae3ecf96035165de3e5327fb33bd30504e21832f (patch) | |
tree | c1bbc762157021b71c7efd2ee65d4f4140e16fe1 | |
parent | 6417171026447cde57330114e7df2a22bebfc135 (diff) |
Look for SSE and instructions of this form: (and x, (build_vector c1,c2,c3,c4)).
If there exists a use of a build_vector that's the bitwise complement of the mask,
then transform the node to
(and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~c1,~c2,~c3,~c4)).
Since this transformation is only useful when 1) the given build_vector will
become a load from constpool, and 2) (and (xor x -1), y) matches to a single
instruction, I decided this is appropriate as a x86 specific transformation.
rdar://7323335
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96389 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 51 | ||||
-rw-r--r-- | test/CodeGen/X86/lsr-reuse-trunc.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/X86/sink-hoist.ll | 1 |
3 files changed, 62 insertions, 5 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9974d8c997..4c40fe1803 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -990,6 +990,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -9157,6 +9158,53 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformANDCombine - Look for SSE and instructions of this form: +/// (and x, (build_vector c1,c2,c3,c4)). If there exists a use of a build_vector +/// that's the bitwise complement of the mask, then transform the node to +/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~c1,~c2,~c3,~c4)). +static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + if (!VT.isVector() || !VT.isInteger()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse()) + return SDValue(); + + if (N1.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SmallVector<SDValue, 8> Mask; + Mask.reserve(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Arg = N1.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + Mask.push_back(Arg); + continue; + } + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg); + if (!C) return SDValue(); + Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT)); + } + N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT, + &Mask[0], NumElts); + if (!N1.use_empty()) { + unsigned Bits = EltVT.getSizeInBits(); + Mask.clear(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT)); + SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VT, &Mask[0], NumElts); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + N0, NewMask), N1); + } + } + + return SDValue(); +} /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. @@ -9305,7 +9353,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); + unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } @@ -9690,6 +9738,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::AND: return PerformANDCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll index d1d714491f..a663a220e6 100644 --- a/test/CodeGen/X86/lsr-reuse-trunc.ll +++ b/test/CodeGen/X86/lsr-reuse-trunc.ll @@ -1,10 +1,19 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s ; Full strength reduction wouldn't reduce register pressure, so LSR should ; stick with indexing here. -; CHECK: movaps (%rsi,%rax,4), %xmm3 -; CHECK: movaps %xmm3, (%rdi,%rax,4) +; Also checks andps and andnps shares the same constantpool. Previously llvm +; will codegen two andps, one using 0x80000000, the other 0x7fffffff. +; rdar://7323335 + +; CHECK: movaps LCPI1_0 +; CHECK: movaps LCPI1_1 +; CHECK-NOT: movaps LCPI1_2 +; CHECK: movaps (%rsi,%rax,4), %xmm2 +; CHECK: andps +; CHECK: andnps +; CHECK: movaps %xmm2, (%rdi,%rax,4) ; CHECK: addq $4, %rax ; CHECK: cmpl %eax, (%rdx) ; CHECK-NEXT: jg diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 01d73736d6..e1d0fe7665 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -63,7 +63,6 @@ entry: ; CHECK: vv: ; CHECK: LCPI4_0(%rip), %xmm0 ; CHECK: LCPI4_1(%rip), %xmm1 -; CHECK: LCPI4_2(%rip), %xmm2 ; CHECK: align ; CHECK-NOT: LCPI ; CHECK: ret |