aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-01-11 22:03:29 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-01-11 22:03:29 +0000
commit199c4240feedec2f9dbd0d4c4c0a32fa46e50270 (patch)
tree23f814b690f461e89f44e4455bdfce90fafb6493
parentc31b0fc31f69435eeb6cd456158239e275f52f31 (diff)
Extend r93152 to work on OR r, r. If the source set bits are known not to overlap, then select as an ADD instead.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93191 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86Instr64bit.td7
-rw-r--r--lib/Target/X86/X86InstrInfo.td36
-rw-r--r--test/CodeGen/X86/3addr-or.ll16
-rw-r--r--test/CodeGen/X86/fast-isel.ll2
4 files changed, 51 insertions, 10 deletions
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index b2aead6a6d..7077cf9bb0 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1093,7 +1093,7 @@ let isCommutable = 1 in
def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, GR64:$src2)),
+ [(set GR64:$dst, (or_not_add GR64:$src1, GR64:$src2)),
(implicit EFLAGS)]>;
def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -2125,13 +2125,16 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
GR64:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-// (or x, c) -> (add x, c) if masked bits are known zero.
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),
(implicit EFLAGS)),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),
(implicit EFLAGS)),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2),
+ (implicit EFLAGS)),
+ (ADD64rr GR64:$src1, GR64:$src2)>;
// X86 specific add which produces a flag.
def : Pat<(addc GR64:$src1, GR64:$src2),
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 28c5154a56..9b69018fa8 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -497,12 +497,28 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- return false;
+ else {
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
+ }
}]>;
def or_not_add : PatFrag<(ops node:$lhs, node:$rhs),(or node:$lhs, node:$rhs),[{
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!CN) return true;
- return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+ else {
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) != 0;
+ }
}]>;
// 'shld' and 'shrd' instruction patterns. Note that even though these have
@@ -1853,12 +1869,12 @@ def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst),
def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (or_not_add GR16:$src1, GR16:$src2)),
(implicit EFLAGS)]>, OpSize;
def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, GR32:$src2)),
+ [(set GR32:$dst, (or_not_add GR32:$src1, GR32:$src2)),
(implicit EFLAGS)]>;
}
@@ -4659,7 +4675,7 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
-// (or x, c) -> (add x, c) if masked bits are known zero.
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
(implicit EFLAGS)),
(ADD16ri GR16:$src1, imm:$src2)>;
@@ -4672,6 +4688,12 @@ def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
(implicit EFLAGS)),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
index 395ba46aab..30a1f36850 100644
--- a/test/CodeGen/X86/3addr-or.ll
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -9,3 +9,19 @@ entry:
%1 = or i32 %0, 3 ; <i32> [#uses=1]
ret i32 %1
}
+
+define i64 @test2(i8 %A, i8 %B) nounwind {
+; CHECK: test2:
+; CHECK: shrq $4
+; CHECK-NOT: movq
+; CHECK-NOT: orq
+; CHECK: leaq
+; CHECK: ret
+ %C = zext i8 %A to i64 ; <i64> [#uses=1]
+ %D = shl i64 %C, 4 ; <i64> [#uses=1]
+ %E = and i64 %D, 48 ; <i64> [#uses=1]
+ %F = zext i8 %B to i64 ; <i64> [#uses=1]
+ %G = lshr i64 %F, 4 ; <i64> [#uses=1]
+ %H = or i64 %G, %E ; <i64> [#uses=1]
+ ret i64 %H
+}
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 3dcd736a14..84b3fd7caf 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -14,7 +14,7 @@ fast:
%t1 = mul i32 %t0, %s
%t2 = sub i32 %t1, %s
%t3 = and i32 %t2, %s
- %t4 = or i32 %t3, %s
+ %t4 = xor i32 %t3, 3
%t5 = xor i32 %t4, %s
%t6 = add i32 %t5, 2
%t7 = getelementptr i32* %y, i32 1