17 files changed, 633 insertions, 620 deletions
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 9d8024b86d..56cb17dee6 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -465,6 +465,29 @@ public:
   SDOperand FoldSetCC(MVT::ValueType VT, SDOperand N1,
                       SDOperand N2, ISD::CondCode Cond);
   
+  /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero.  We
+  /// use this predicate to simplify operations downstream.  Op and Mask are
+  /// known to be the same type.
+  bool MaskedValueIsZero(SDOperand Op, uint64_t Mask, unsigned Depth = 0)
+    const;
+  
+  /// ComputeMaskedBits - Determine which of the bits specified in Mask are
+  /// known to be either zero or one and return them in the KnownZero/KnownOne
+  /// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+  /// processing.  Targets can implement the computeMaskedBitsForTargetNode 
+  /// method in the TargetLowering class to allow target nodes to be understood.
+  void ComputeMaskedBits(SDOperand Op, uint64_t Mask, uint64_t &KnownZero,
+                         uint64_t &KnownOne, unsigned Depth = 0) const;
+    
+  /// ComputeNumSignBits - Return the number of times the sign bit of the
+  /// register is replicated into the other bits.  We know that at least 1 bit
+  /// is always equal to the sign bit (itself), but other cases can give us
+  /// information.  For example, immediately after an "SRA X, 2", we know that
+  /// the top 3 bits are all equal to each other, so we return 3.  Targets can
+  /// implement the ComputeNumSignBitsForTarget method in the TargetLowering
+  /// class to allow target nodes to be understood.
+  unsigned ComputeNumSignBits(SDOperand Op, unsigned Depth = 0) const;
+  
 private:
   void RemoveNodeFromCSEMaps(SDNode *N);
   SDNode *AddNonLeafNodeToCSEMaps(SDNode *N);
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index de51945ae4..5b98667602 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -494,20 +494,6 @@ public:
     bool ShrinkDemandedConstant(SDOperand Op, uint64_t Demanded);
   };
                                                 
-  /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero.  We
-  /// use this predicate to simplify operations downstream.  Op and Mask are
-  /// known to be the same type.
-  bool MaskedValueIsZero(SDOperand Op, uint64_t Mask, unsigned Depth = 0)
-    const;
-  
-  /// ComputeMaskedBits - Determine which of the bits specified in Mask are
-  /// known to be either zero or one and return them in the KnownZero/KnownOne
-  /// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
-  /// processing.  Targets can implement the computeMaskedBitsForTargetNode 
-  /// method, to allow target nodes to be understood.
-  void ComputeMaskedBits(SDOperand Op, uint64_t Mask, uint64_t &KnownZero,
-                         uint64_t &KnownOne, unsigned Depth = 0) const;
-    
   /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
   /// DemandedMask bits of the result of Op are ever used downstream.  If we can
   /// use this information to simplify Op, create a new simplified DAG node and
@@ -527,15 +513,9 @@ public:
                                               uint64_t Mask,
                                               uint64_t &KnownZero, 
                                               uint64_t &KnownOne,
+                                              const SelectionDAG &DAG,
                                               unsigned Depth = 0) const;
 
-  /// ComputeNumSignBits - Return the number of times the sign bit of the
-  /// register is replicated into the other bits.  We know that at least 1 bit
-  /// is always equal to the sign bit (itself), but other cases can give us
-  /// information.  For example, immediately after an "SRA X, 2", we know that
-  /// the top 3 bits are all equal to each other, so we return 3.
-  unsigned ComputeNumSignBits(SDOperand Op, unsigned Depth = 0) const;
-  
   /// ComputeNumSignBitsForTargetNode - This method can be implemented by
   /// targets that want to expose additional information about sign bits to the
   /// DAG Combiner.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cfc52d9c5c..32ab5d323e 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -896,9 +896,9 @@ SDOperand DAGCombiner::visitADD(SDNode *N) {
     uint64_t LHSZero, LHSOne;
     uint64_t RHSZero, RHSOne;
     uint64_t Mask = MVT::getIntVTBitMask(VT);
-    TLI.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
     if (LHSZero) {
-      TLI.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
       
       // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
       // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -957,9 +957,9 @@ SDOperand DAGCombiner::visitADDC(SDNode *N) {
   uint64_t LHSZero, LHSOne;
   uint64_t RHSZero, RHSOne;
   uint64_t Mask = MVT::getIntVTBitMask(VT);
-  TLI.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
   if (LHSZero) {
-    TLI.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
     
     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -1120,8 +1120,8 @@ SDOperand DAGCombiner::visitSDIV(SDNode *N) {
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
-  if (TLI.MaskedValueIsZero(N1, SignBit) &&
-      TLI.MaskedValueIsZero(N0, SignBit))
+  if (DAG.MaskedValueIsZero(N1, SignBit) &&
+      DAG.MaskedValueIsZero(N0, SignBit))
     return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
   // fold (sdiv X, pow2) -> simple ops after legalize
   if (N1C && N1C->getValue() && !TLI.isIntDivCheap() &&
@@ -1214,8 +1214,8 @@ SDOperand DAGCombiner::visitSREM(SDNode *N) {
   // If we know the sign bits of both operands are zero, strength reduce to a
   // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
-  if (TLI.MaskedValueIsZero(N1, SignBit) &&
-      TLI.MaskedValueIsZero(N0, SignBit))
+  if (DAG.MaskedValueIsZero(N1, SignBit) &&
+      DAG.MaskedValueIsZero(N0, SignBit))
     return DAG.getNode(ISD::UREM, VT, N0, N1);
   
   // Unconditionally lower X%C -> X-X/C*C.  This allows the X/C logic to hack on
@@ -1357,7 +1357,7 @@ SDOperand DAGCombiner::visitAND(SDNode *N) {
   if (N1C && N1C->isAllOnesValue())
     return N0;
   // if (and x, c) is known to be zero, return 0
-  if (N1C && TLI.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+  if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
     return DAG.getConstant(0, VT);
   // reassociate and
   SDOperand RAND = ReassociateOps(ISD::AND, N0, N1);
@@ -1371,7 +1371,7 @@ SDOperand DAGCombiner::visitAND(SDNode *N) {
   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     unsigned InMask = MVT::getIntVTBitMask(N0.getOperand(0).getValueType());
-    if (TLI.MaskedValueIsZero(N0.getOperand(0),
+    if (DAG.MaskedValueIsZero(N0.getOperand(0),
                               ~N1C->getValue() & InMask)) {
       SDOperand Zext = DAG.getNode(ISD::ZERO_EXTEND, N0.getValueType(),
                                    N0.getOperand(0));
@@ -1442,7 +1442,7 @@ SDOperand DAGCombiner::visitAND(SDNode *N) {
     MVT::ValueType EVT = LN0->getLoadedVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    if (TLI.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+    if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
         (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
       SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
                                          LN0->getBasePtr(), LN0->getSrcValue(),
@@ -1461,7 +1461,7 @@ SDOperand DAGCombiner::visitAND(SDNode *N) {
     MVT::ValueType EVT = LN0->getLoadedVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    if (TLI.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
+    if (DAG.MaskedValueIsZero(N1, ~0ULL << MVT::getSizeInBits(EVT)) &&
         (!AfterLegalize || TLI.isLoadXLegal(ISD::ZEXTLOAD, EVT))) {
       SDOperand ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, LN0->getChain(),
                                          LN0->getBasePtr(), LN0->getSrcValue(),
@@ -1542,7 +1542,7 @@ SDOperand DAGCombiner::visitOR(SDNode *N) {
     return N1;
   // fold (or x, c) -> c iff (x & ~c) == 0
   if (N1C && 
-      TLI.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits))))
+      DAG.MaskedValueIsZero(N0,~N1C->getValue() & (~0ULL>>(64-OpSizeInBits))))
     return N1;
   // reassociate or
   SDOperand ROR = ReassociateOps(ISD::OR, N0, N1);
@@ -1611,8 +1611,8 @@ SDOperand DAGCombiner::visitOR(SDNode *N) {
     uint64_t LHSMask = cast<ConstantSDNode>(N0.getOperand(1))->getValue();
     uint64_t RHSMask = cast<ConstantSDNode>(N1.getOperand(1))->getValue();
     
-    if (TLI.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
-        TLI.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
       SDOperand X =DAG.getNode(ISD::OR, VT, N0.getOperand(0), N1.getOperand(0));
       return DAG.getNode(ISD::AND, VT, X, DAG.getConstant(LHSMask|RHSMask, VT));
     }
@@ -1914,7 +1914,7 @@ SDOperand DAGCombiner::visitSHL(SDNode *N) {
   if (N1C && N1C->isNullValue())
     return N0;
   // if (shl x, c) is known to be zero, return 0
-  if (TLI.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
+  if (DAG.MaskedValueIsZero(SDOperand(N, 0), MVT::getIntVTBitMask(VT)))
     return DAG.getConstant(0, VT);
   if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
     return SDOperand(N, 0);
@@ -2005,7 +2005,7 @@ SDOperand DAGCombiner::visitSRA(SDNode *N) {
   
   
   // If the sign bit is known to be zero, switch this to a SRL.
-  if (TLI.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
+  if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
     return DAG.getNode(ISD::SRL, VT, N0, N1);
   return SDOperand();
 }
@@ -2031,7 +2031,7 @@ SDOperand DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && N1C->isNullValue())
     return N0;
   // if (srl x, c) is known to be zero, return 0
-  if (N1C && TLI.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits)))
+  if (N1C && DAG.MaskedValueIsZero(SDOperand(N, 0), ~0ULL >> (64-OpSizeInBits)))
     return DAG.getConstant(0, VT);
   
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, c1+c2)
@@ -2068,7 +2068,7 @@ SDOperand DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::CTLZ && 
       N1C->getValue() == Log2_32(MVT::getSizeInBits(VT))) {
     uint64_t KnownZero, KnownOne, Mask = MVT::getIntVTBitMask(VT);
-    TLI.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
     
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
@@ -2270,7 +2270,7 @@ SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     unsigned OpBits   = MVT::getSizeInBits(Op.getValueType());
     unsigned MidBits  = MVT::getSizeInBits(N0.getValueType());
     unsigned DestBits = MVT::getSizeInBits(VT);
-    unsigned NumSignBits = TLI.ComputeNumSignBits(Op);
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
     
     if (OpBits == DestBits) {
       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
@@ -2634,7 +2634,7 @@ SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, VT, N0, N1);
   
   // If the input is already sign extended, just drop the extension.
-  if (TLI.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1)
+  if (DAG.ComputeNumSignBits(N0) >= MVT::getSizeInBits(VT)-EVTBits+1)
     return N0;
   
   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -2644,7 +2644,7 @@ SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   }
 
   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
-  if (TLI.MaskedValueIsZero(N0, 1ULL << (EVTBits-1)))
+  if (DAG.MaskedValueIsZero(N0, 1ULL << (EVTBits-1)))
     return DAG.getZeroExtendInReg(N0, EVT);
   
   // fold operands of sext_in_reg based on knowledge that the top bits are not
@@ -2666,7 +2666,7 @@ SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
       if (ShAmt->getValue()+EVTBits <= MVT::getSizeInBits(VT)) {
         // We can turn this into an SRA iff the input to the SRL is already sign
         // extended enough.
-        unsigned InSignBits = TLI.ComputeNumSignBits(N0.getOperand(0));
+        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
         if (MVT::getSizeInBits(VT)-(ShAmt->getValue()+EVTBits) < InSignBits)
           return DAG.getNode(ISD::SRA, VT, N0.getOperand(0), N0.getOperand(1));
       }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 0584d6ed6e..57357442fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1449,7 +1449,7 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
       
       // The top bits of the promoted condition are not necessarily zero, ensure
       // that the value is properly zero extended.
-      if (!TLI.MaskedValueIsZero(Tmp2, 
+      if (!DAG.MaskedValueIsZero(Tmp2, 
                                  MVT::getIntVTBitMask(Tmp2.getValueType())^1))
         Tmp2 = DAG.getZeroExtendInReg(Tmp2, MVT::i1);
       break;
@@ -2041,7 +2041,7 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
     case Promote:
       Tmp1 = PromoteOp(Node->getOperand(0));  // Promote the condition.
       // Make sure the condition is either zero or one.
-      if (!TLI.MaskedValueIsZero(Tmp1,
+      if (!DAG.MaskedValueIsZero(Tmp1,
                                  MVT::getIntVTBitMask(Tmp1.getValueType())^1))
         Tmp1 = DAG.getZeroExtendInReg(Tmp1, MVT::i1);
       break;
@@ -4209,7 +4209,7 @@ bool SelectionDAGLegalize::ExpandShift(unsigned Opc, SDOperand Op,SDOperand Amt,
   // Okay, the shift amount isn't constant.  However, if we can tell that it is
   // >= 32 or < 32, we can still simplify it, without knowing the actual value.
   uint64_t Mask = NVTBits, KnownZero, KnownOne;
-  TLI.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne);
+  DAG.ComputeMaskedBits(Amt, Mask, KnownZero, KnownOne);
   
   // If we know that the high bit of the shift amount is one, then we can do
   // this as a couple of simple shifts.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0ac77f99a3..d70823379a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -936,6 +936,552 @@ SDOperand SelectionDAG::FoldSetCC(MVT::ValueType VT, SDOperand N1,
   return SDOperand();
 }
 
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 
+                                     unsigned Depth) const {
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return false;
+  
+  uint64_t KnownZero, KnownOne;
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+  return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 
+                                     uint64_t &KnownZero, uint64_t &KnownOne,
+                                     unsigned Depth) const {
+  KnownZero = KnownOne = 0;   // Don't know anything.
+  if (Depth == 6 || Mask == 0)
+    return;  // Limit search depth.
+  
+  // The masks are not wide enough to represent this type!  Should use APInt.
+  if (Op.getValueType() == MVT::i128)
+    return;
+  
+  uint64_t KnownZero2, KnownOne2;
+
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  case ISD::AND:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    Mask &= ~KnownZero;
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  case ISD::OR:
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    Mask &= ~KnownOne;
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  case ISD::XOR: {
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case ISD::SELECT:
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SELECT_CC:
+    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (TLI.getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult)
+      KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
+    return;
+  case ISD::SHL:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero <<= SA->getValue();
+      KnownOne  <<= SA->getValue();
+      KnownZero |= (1ULL << SA->getValue())-1;  // low bits known zero.
+    }
+    return;
+  case ISD::SRL:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask,
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT)-ShAmt;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    return;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      MVT::ValueType VT = Op.getValueType();
+      unsigned ShAmt = SA->getValue();
+
+      // Compute the new bits that are at the top now.
+      uint64_t TypeMask = MVT::getIntVTBitMask(VT);
+
+      uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask;
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      uint64_t HighBits = (1ULL << ShAmt)-1;
+      HighBits <<= MVT::getSizeInBits(VT) - ShAmt;
+      if (HighBits & Mask)
+        InDemandedMask |= MVT::getIntVTSignBit(VT);
+      
+      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero &= TypeMask;
+      KnownOne  &= TypeMask;
+      KnownZero >>= ShAmt;
+      KnownOne  >>= ShAmt;
+      
+      // Handle the sign bits.
+      uint64_t SignBit = MVT::getIntVTSignBit(VT);
+      SignBit >>= ShAmt;  // Adjust to where it is now in the mask.
+      
+      if (KnownZero & SignBit) {       
+        KnownZero |= HighBits;  // New bits are known zero.
+      } else if (KnownOne & SignBit) {
+        KnownOne  |= HighBits;  // New bits are known one.
+      }
+    }
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    
+    // Sign extension.  Compute the demanded bits in the result that are not 
+    // present in the input.
+    uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask;
+
+    uint64_t InSignBit = MVT::getIntVTSignBit(EVT);
+    int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT);
+    
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits)
+      InputDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero & InSignBit) {          // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {    // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    MVT::ValueType VT = Op.getValueType();
+    unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1;
+    KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT);
+    KnownOne  = 0;
+    return;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.Val)) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      MVT::ValueType VT = LD->getLoadedVT();
+      KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask;
+    }
+    return;
+  }
+  case ISD::ZERO_EXTEND: {
+    uint64_t InMask  = MVT::getIntVTBitMask(Op.getOperand(0).getValueType());
+    uint64_t NewBits = (~InMask) & Mask;
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= NewBits & Mask;
+    KnownOne  &= ~NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    MVT::ValueType InVT = Op.getOperand(0).getValueType();
+    unsigned InBits    = MVT::getSizeInBits(InVT);
+    uint64_t InMask    = MVT::getIntVTBitMask(InVT);
+    uint64_t InSignBit = 1ULL << (InBits-1);
+    uint64_t NewBits   = (~InMask) & Mask;
+    uint64_t InDemandedBits = Mask & InMask;
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if (NewBits & Mask)
+      InDemandedBits |= InSignBit;
+    
+    ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 
+                      KnownOne, Depth+1);
+    // If the sign bit is known zero or one, the  top bits match.
+    if (KnownZero & InSignBit) {
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne & InSignBit) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::ANY_EXTEND: {
+    MVT::ValueType VT = Op.getOperand(0).getValueType();
+    ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT),
+                      KnownZero, KnownOne, Depth+1);
+    return;
+  }
+  case ISD::TRUNCATE: {
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType());
+    KnownZero &= OutMask;
+    KnownOne &= OutMask;
+    break;
+  }
+  case ISD::AssertZext: {
+    MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    uint64_t InMask = MVT::getIntVTBitMask(VT);
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
+    return;
+  }
+  case ISD::ADD: {
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the low clear bits
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
+    uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 
+                                     CountTrailingZeros_64(~KnownZero2));
+    
+    KnownZero = (1ULL << KnownZeroOut) - 1;
+    KnownOne = 0;
+    return;
+  }
+  case ISD::SUB: {
+    ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+    if (!CLHS) return;
+
+    // We know that the top bits of C-X are clear if X contains less bits
+    // than C (i.e. no wrap-around can happen).  For example, 20-X is
+    // positive if we can prove that X is >= 0 and < 16.
+    MVT::ValueType VT = CLHS->getValueType(0);
+    if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) {  // sign bit clear
+      unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1);
+      uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit
+      MaskV = ~MaskV & MVT::getIntVTBitMask(VT);
+      ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1);
+
+      // If all of the MaskV bits are known to be zero, then we know the output
+      // top bits are zero, because we now know that the output is from [0-C].
+      if ((KnownZero & MaskV) == MaskV) {
+        unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue());
+        KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask;  // Top bits known zero.
+        KnownOne = 0;   // No one bits known.
+      } else {
+        KnownZero = KnownOne = 0;  // Otherwise, nothing known.
+      }
+    }
+    return;
+  }
+  default:
+    // Allow the target to implement this method for its nodes.
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this);
+    }
+    return;
+  }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{
+  MVT::ValueType VT = Op.getValueType();
+  assert(MVT::isInteger(VT) && "Invalid VT!");
+  unsigned VTBits = MVT::getSizeInBits(VT);
+  unsigned Tmp, Tmp2;
+  
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    return VTBits-Tmp;
+    
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(Op)->getValue();
+    // If negative, invert the bits, then look at it.
+    if (Val & MVT::getIntVTSignBit(VT))
+      Val = ~Val;
+    
+    // Shift the bits so they are the leading bits in the int64_t.
+    Val <<= 64-VTBits;
+    
+    // Return # leading zeros.  We use 'min' here in case Val was zero before
+    // shifting.  We don't want to return '64' as for an i32 "0".
+    return std::min(VTBits, CountLeadingZeros_64(Val));
+  }
+    
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType());
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+    
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT());
+    Tmp = VTBits-Tmp+1;
+    
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getValue() >= VTBits ||      // Bad shift.
+          C->getValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    return std::min(Tmp, Tmp2);
+    
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (TLI.getSetCCResultContents() ==
+        TargetLowering::ZeroOrNegativeOneSetCCResult)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getValue() & (VTBits-1);
+      
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.