diff options
author | Torok Edwin <edwintorok@gmail.com> | 2008-04-06 21:23:02 +0000 |
---|---|---|
committer | Torok Edwin <edwintorok@gmail.com> | 2008-04-06 21:23:02 +0000 |
commit | 4fea2e982d79132715711dfcfdc46abf15239217 (patch) | |
tree | 6a99d39f0d871028fd0ba90ec92df26864c8f9d0 | |
parent | 051a950000e21935165db56695e35bade668193b (diff) |
Prefer to expand mask for xor to -1, so we have a chance to turn it into a not.
If it cannot be expanded, it will keep the old behaviour and try to shrink the constant.
Part of enhancement for PR2191.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49280 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 23 | ||||
-rw-r--r-- | test/CodeGen/X86/xor_not.ll | 146 |
2 files changed, 165 insertions, 4 deletions
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0fcb3c81d7..a0894ddebc 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -657,10 +657,25 @@ bool TargetLowering::SimplifyDemandedBits(SDOperand Op, } // If the RHS is a constant, see if we can simplify it. - // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) - return true; - + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + MVT::ValueType VT = Op.getValueType(); + SDOperand New = TLO.DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + KnownZero = KnownZeroOut; KnownOne = KnownOneOut; break; diff --git a/test/CodeGen/X86/xor_not.ll b/test/CodeGen/X86/xor_not.ll new file mode 100644 index 0000000000..52f8e653cc --- /dev/null +++ b/test/CodeGen/X86/xor_not.ll @@ -0,0 +1,146 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep not[lwb] | count 3 +; RUN: llvm-as < %s | llc -march=x86-64 | grep not[lwb] | count 4 +define i32 @test(i32 %a, i32 %b) nounwind { +entry: + %tmp1not = xor i32 %b, -2 + %tmp3 = and i32 %tmp1not, %a + %tmp4 = lshr i32 %tmp3, 1 + ret i32 %tmp4 +} + +define i32 @sum32(i32 %a, i32 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i32 %a_addr.0, %b_addr.0 + %tmp4not = xor i32 %tmp3, 2147483647 + %tmp6 = and i32 %tmp4not, %b_addr.0 + %tmp8 = shl i32 %tmp6, 1 + %tmp10 = icmp eq i32 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i32 %tmp3 +} + +define i16 @sum16(i16 %a, i16 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i16 %a_addr.0, %b_addr.0 + %tmp4not = xor i16 %tmp3, 32767 + %tmp6 = and i16 %tmp4not, %b_addr.0 + %tmp8 = shl i16 %tmp6, 1 + %tmp10 = icmp eq i16 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i16 %tmp3 +} + +define i8 @sum8(i8 %a, i8 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i8 %a_addr.0, %b_addr.0 + %tmp4not = xor i8 %tmp3, 127 + %tmp6 = and i8 %tmp4not, %b_addr.0 + %tmp8 = shl i8 %tmp6, 1 + %tmp10 = icmp eq i8 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i8 %tmp3 +} + +define i32 @notransform(i32 %a, i32 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i32 %a_addr.0, %b_addr.0 + %tmp4not = xor i32 %tmp3, 2147483646 + %tmp6 = and i32 %tmp4not, %b_addr.0 + %tmp8 = shl i32 %tmp6, 1 + %tmp10 = icmp eq i32 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i32 %tmp3 +} +; RUN: llvm-as < %s | llc -march=x86 | grep not[lwb] | count 3 +; RUN: llvm-as < %s | llc -march=x86-64 | grep not[lwb] | count 4 +define i32 @test(i32 %a, i32 %b) nounwind { +entry: + %tmp1not = xor i32 %b, -2 + %tmp3 = and i32 %tmp1not, %a + %tmp4 = lshr i32 %tmp3, 1 + ret i32 %tmp4 +} + +define i32 @sum32(i32 %a, i32 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i32 %a_addr.0, %b_addr.0 + %tmp4not = xor i32 %tmp3, 2147483647 + %tmp6 = and i32 %tmp4not, %b_addr.0 + %tmp8 = shl i32 %tmp6, 1 + %tmp10 = icmp eq i32 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i32 %tmp3 +} + +define i16 @sum16(i16 %a, i16 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i16 %a_addr.0, %b_addr.0 + %tmp4not = xor i16 %tmp3, 32767 + %tmp6 = and i16 %tmp4not, %b_addr.0 + %tmp8 = shl i16 %tmp6, 1 + %tmp10 = icmp eq i16 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i16 %tmp3 +} + +define i8 @sum8(i8 %a, i8 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i8 %a_addr.0, %b_addr.0 + %tmp4not = xor i8 %tmp3, 127 + %tmp6 = and i8 %tmp4not, %b_addr.0 + %tmp8 = shl i8 %tmp6, 1 + %tmp10 = icmp eq i8 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i8 %tmp3 +} + +define i32 @notransform(i32 %a, i32 %b) nounwind { +entry: + br label %bb +bb: + %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ] + %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ] + %tmp3 = xor i32 %a_addr.0, %b_addr.0 + %tmp4not = xor i32 %tmp3, 2147483646 + %tmp6 = and i32 %tmp4not, %b_addr.0 + %tmp8 = shl i32 %tmp6, 1 + %tmp10 = icmp eq i32 %tmp8, 0 + br i1 %tmp10, label %bb12, label %bb +bb12: + ret i32 %tmp3 +} |