Significantly rework the calculation of effective integer-expression ranges

for -Wsign-compare and -Wconversion, and use that coordinated logic to drive both diagnostics. The new logic works more transparently with implicit conversions, conditional operators, etc., as well as bringing -Wconversion's ability to deal with pseudo-closed operations (e.g. arithmetic on shorts) to -Wsign-compare. Fixes PRs 5887, 5937, 5938, and 5939. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@92823 91177308-0d34-0410-b5e6-96231b3b80d8
author: John McCall <rjmccall@apple.com> 2010-01-06 05:24:50 +0000
committer: John McCall <rjmccall@apple.com> 2010-01-06 05:24:50 +0000
commit: f2370c9b4aade940e2253b5b33262ba507d1d71f (patch)
tree: a375aa5b288f28b3b2f040a626064f49827d5eab /lib/Sema/SemaChecking.cpp
parent: 1a78afbde2257d01bd38a36e094d3e3231a9b412 (diff)
1 files changed, 221 insertions, 176 deletions
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index fc8e18bc09..00e7242808 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -1557,170 +1557,153 @@ void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
       << lex->getSourceRange() << rex->getSourceRange();
 }
 
-//===--- CHECK: Comparison of signed and unsigned int (-Wsign-compare) ----===//
+//===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===//
+//===--- CHECK: Lossy implicit conversions (-Wconversion) --------------===//
 
-/// Returns true if we can prove that the result of the given
-/// integral expression will not have its sign bit set.
-static bool IsSignBitProvablyZero(ASTContext &Context, Expr *E) {
-  E = E->IgnoreParens();
-
-  llvm::APSInt value;
-  if (E->isIntegerConstantExpr(value, Context))
-    return value.isNonNegative();
+namespace {
 
-  if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E))
-    return IsSignBitProvablyZero(Context, CO->getLHS()) &&
-           IsSignBitProvablyZero(Context, CO->getRHS());
-
-  return false;
-}
+/// Structure recording the 'active' range of an integer-valued
+/// expression.
+struct IntRange {
+  /// The number of bits active in the int.
+  unsigned Width;
 
-/// Retrieves the width and signedness of the given integer type,
-/// or returns false if it is not an integer type.
-///
-/// \param T must be canonical
-static bool getIntProperties(ASTContext &C, const Type *T,
-                             unsigned &BitWidth, bool &Signed) {
-  assert(T->isCanonicalUnqualified());
+  /// True if the int is known not to have negative values.
+  bool NonNegative;
 
-  if (const VectorType *VT = dyn_cast<VectorType>(T))
-    T = VT->getElementType().getTypePtr();
-  if (const ComplexType *CT = dyn_cast<ComplexType>(T))
-    T = CT->getElementType().getTypePtr();
+  IntRange() {}
+  IntRange(unsigned Width, bool NonNegative)
+    : Width(Width), NonNegative(NonNegative)
+  {}
 
-  if (const BuiltinType *BT = dyn_cast<BuiltinType>(T)) {
-    if (!BT->isInteger()) return false;
+  // Returns the range of the bool type.
+  static IntRange forBoolType() {
+    return IntRange(1, true);
+  }
 
-    BitWidth = C.getIntWidth(QualType(T, 0));
-    Signed = BT->isSignedInteger();
-    return true;
+  // Returns the range of an integral type.
+  static IntRange forType(ASTContext &C, QualType T) {
+    return forCanonicalType(C, T->getCanonicalTypeInternal().getTypePtr());
   }
 
-  return false;
-}
+  // Returns the range of an integeral type based on its canonical
+  // representation.
+  static IntRange forCanonicalType(ASTContext &C, const Type *T) {
+    assert(T->isCanonicalUnqualified());
 
-/// Checks whether the given value will have the same value if it it
-/// is truncated to the given width, then extended back to the
-/// original width.
-static bool IsSameIntAfterCast(const llvm::APSInt &value,
-                               unsigned TargetWidth) {
-  unsigned SourceWidth = value.getBitWidth();
-  llvm::APSInt truncated = value;
-  truncated.trunc(TargetWidth);
-  truncated.extend(SourceWidth);
-  return (truncated == value);
-}
+    if (const VectorType *VT = dyn_cast<VectorType>(T))
+      T = VT->getElementType().getTypePtr();
+    if (const ComplexType *CT = dyn_cast<ComplexType>(T))
+      T = CT->getElementType().getTypePtr();
+    if (const EnumType *ET = dyn_cast<EnumType>(T))
+      T = ET->getDecl()->getIntegerType().getTypePtr();
 
-/// Checks whether the given value will have the same value if it
-/// is truncated to the given width, then extended back to the original
-/// width.
-///
-/// The value might be a vector or a complex.
-static bool IsSameIntAfterCast(const APValue &value, unsigned TargetWidth) {
-  if (value.isInt())
-    return IsSameIntAfterCast(value.getInt(), TargetWidth);
+    const BuiltinType *BT = cast<BuiltinType>(T);
+    assert(BT->isInteger());
 
-  if (value.isVector()) {
-    for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i)
-      if (!IsSameIntAfterCast(value.getVectorElt(i), TargetWidth))
-        return false;
-    return true;
+    return IntRange(C.getIntWidth(QualType(T, 0)), BT->isUnsignedInteger());
   }
 
-  if (value.isComplexInt()) {
-    return IsSameIntAfterCast(value.getComplexIntReal(), TargetWidth) &&
-           IsSameIntAfterCast(value.getComplexIntImag(), TargetWidth);
+  // Returns the supremum of two ranges: i.e. their conservative merge.
+  static IntRange join(const IntRange &L, const IntRange &R) {
+    return IntRange(std::max(L.Width, R.Width),
+                        L.NonNegative && R.NonNegative);
   }
+};
 
-  // This can happen with lossless casts to intptr_t of "based" lvalues.
-  // Assume it might use arbitrary bits.
-  assert(value.isLValue());
-  return false;
-}
-                               
+IntRange GetValueRange(ASTContext &C, llvm::APSInt &value, unsigned MaxWidth) {
+  if (value.isSigned() && value.isNegative())
+    return IntRange(value.getMinSignedBits(), false);
 
-/// Checks whether the given value, which currently has the given
-/// source semantics, has the same value when coerced through the
-/// target semantics.
-static bool IsSameFloatAfterCast(const llvm::APFloat &value,
-                                 const llvm::fltSemantics &Src,
-                                 const llvm::fltSemantics &Tgt) {
-  llvm::APFloat truncated = value;
+  if (value.getBitWidth() > MaxWidth)
+    value.trunc(MaxWidth);
 
-  bool ignored;
-  truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored);
-  truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored);
-
-  return truncated.bitwiseIsEqual(value);
+  // isNonNegative() just checks the sign bit without considering
+  // signedness.
+  return IntRange(value.getActiveBits(), true);
 }
 
-/// Checks whether the given value, which currently has the given
-/// source semantics, has the same value when coerced through the
-/// target semantics.
-///
-/// The value might be a vector of floats (or a complex number).
-static bool IsSameFloatAfterCast(const APValue &value,
-                                 const llvm::fltSemantics &Src,
-                                 const llvm::fltSemantics &Tgt) {
-  if (value.isFloat())
-    return IsSameFloatAfterCast(value.getFloat(), Src, Tgt);
+IntRange GetValueRange(ASTContext &C, APValue &result,
+                       unsigned MaxWidth) {
+  if (result.isInt())
+    return GetValueRange(C, result.getInt(), MaxWidth);
 
-  if (value.isVector()) {
-    for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i)
-      if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt))
-        return false;
-    return true;
+  if (result.isVector()) {
+    IntRange R = GetValueRange(C, result.getVectorElt(0), MaxWidth);
+    for (unsigned i = 1, e = result.getVectorLength(); i != e; ++i)
+      R = IntRange::join(R, GetValueRange(C, result.getVectorElt(i), MaxWidth));
+    return R;
   }
 
-  assert(value.isComplexFloat());
-  return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) &&
-          IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt));
+  if (result.isComplexInt()) {
+    IntRange R = GetValueRange(C, result.getComplexIntReal(), MaxWidth);
+    IntRange I = GetValueRange(C, result.getComplexIntImag(), MaxWidth);
+    return IntRange::join(R, I);
+  }
+
+  // This can happen with lossless casts to intptr_t of "based" lvalues.
+  // Assume it might use arbitrary bits.
+  assert(result.isLValue());
+  return IntRange(MaxWidth, false);
 }
 
-/// Determines if it's reasonable for the given expression to be truncated
-/// down to the given integer width.
-/// * Boolean expressions are automatically white-listed.
-/// * Arithmetic operations on implicitly-promoted operands of the
-///   target width or less are okay --- not because the results are
-///   actually guaranteed to fit within the width, but because the
-///   user is effectively pretending that the operations are closed
-///   within the implicitly-promoted type.
-static bool IsExprValueWithinWidth(ASTContext &C, Expr *E, unsigned Width) {
+/// Pseudo-evaluate the given integer expression, estimating the
+/// range of values it might take.
+///
+/// \param MaxWidth - the width to which the value will be truncated
+IntRange GetExprRange(ASTContext &C, Expr *E, unsigned MaxWidth) {
   E = E->IgnoreParens();
 
-#ifndef NDEBUG
-  {
-    const Type *ETy = E->getType()->getCanonicalTypeInternal().getTypePtr();
-    unsigned EWidth;
-    bool ESigned;
+  // Try a full evaluation first.
+  Expr::EvalResult result;
+  if (E->Evaluate(result, C))
+    return GetValueRange(C, result.Val, MaxWidth);
 
-    if (!getIntProperties(C, ETy, EWidth, ESigned))
-      assert(0 && "expression not of integer type");
+  // I think we only want to look through implicit casts here; if the
+  // user has an explicit widening cast, we should treat the value as
+  // being of the new, wider type.
+  if (ImplicitCastExpr *CE = dyn_cast<ImplicitCastExpr>(E)) {
+    if (CE->getCastKind() == CastExpr::CK_NoOp)
+      return GetExprRange(C, CE->getSubExpr(), MaxWidth);
 
-    // The caller should never let this happen.
-    assert(EWidth > Width && "called on expr whose type is too small");
-  }
-#endif
+    IntRange OutputTypeRange = IntRange::forType(C, CE->getType());
 
-  // Strip implicit casts off.
-  while (isa<ImplicitCastExpr>(E)) {
-    E = cast<ImplicitCastExpr>(E)->getSubExpr();
+    // Assume that non-integer casts can span the full range of the type.
+    if (CE->getCastKind() != CastExpr::CK_IntegralCast)
+      return OutputTypeRange;
 
-    const Type *ETy = E->getType()->getCanonicalTypeInternal().getTypePtr();
+    IntRange SubRange
+      = GetExprRange(C, CE->getSubExpr(),
+                     std::min(MaxWidth, OutputTypeRange.Width));
 
-    unsigned EWidth;
-    bool ESigned;
-    if (!getIntProperties(C, ETy, EWidth, ESigned))
-      return false;
+    // Bail out if the subexpr's range is as wide as the cast type.
+    if (SubRange.Width >= OutputTypeRange.Width)
+      return OutputTypeRange;
 
-    if (EWidth <= Width)
-      return true;
+    // Otherwise, we take the smaller width, and we're non-negative if
+    // either the output type or the subexpr is.
+    return IntRange(SubRange.Width,
+                    SubRange.NonNegative || OutputTypeRange.NonNegative);
+  }
+
+  if (ConditionalOperator *CO = dyn_cast<ConditionalOperator>(E)) {
+    // If we can fold the condition, just take that operand.
+    bool CondResult;
+    if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C))
+      return GetExprRange(C, CondResult ? CO->getTrueExpr()
+                                        : CO->getFalseExpr(),
+                          MaxWidth);
+
+    // Otherwise, conservatively merge.
+    IntRange L = GetExprRange(C, CO->getTrueExpr(), MaxWidth);
+    IntRange R = GetExprRange(C, CO->getFalseExpr(), MaxWidth);
+    return IntRange::join(L, R);
   }
 
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
     switch (BO->getOpcode()) {
 
-    // Boolean-valued operations are white-listed.
+    // Boolean-valued operations are single-bit and positive.
     case BinaryOperator::LAnd:
     case BinaryOperator::LOr:
     case BinaryOperator::LT:
@@ -1729,65 +1712,108 @@ static bool IsExprValueWithinWidth(ASTContext &C, Expr *E, unsigned Width) {
     case BinaryOperator::GE:
     case BinaryOperator::EQ:
     case BinaryOperator::NE:
-      return true;
+      return IntRange::forBoolType();
 
     // Operations with opaque sources are black-listed.
     case BinaryOperator::PtrMemD:
     case BinaryOperator::PtrMemI:
-      return false;
+      return IntRange::forType(C, E->getType());
 
     // Left shift gets black-listed based on a judgement call.
     case BinaryOperator::Shl:
-      return false;
+      return IntRange::forType(C, E->getType());
 
     // Various special cases.
     case BinaryOperator::Shr:
-      return IsExprValueWithinWidth(C, BO->getLHS(), Width);
+      // TODO: if the RHS is constant, change the width as appropriate.
+      return GetExprRange(C, BO->getLHS(), MaxWidth);
     case BinaryOperator::Comma:
-      return IsExprValueWithinWidth(C, BO->getRHS(), Width);
+      return GetExprRange(C, BO->getRHS(), MaxWidth);
+
     case BinaryOperator::Sub:
       if (BO->getLHS()->getType()->isPointerType())
-        return false;
+        return IntRange::forType(C, E->getType());
       // fallthrough
       
-    // Any other operator is okay if the operands are
-    // promoted from expressions of appropriate size.
     default:
-      return IsExprValueWithinWidth(C, BO->getLHS(), Width) &&
-             IsExprValueWithinWidth(C, BO->getRHS(), Width);
+      break;
     }
+
+    // Treat every other operator as if it were closed on the
+    // narrowest type that encompasses both operands.
+    IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth);
+    IntRange R = GetExprRange(C, BO->getRHS(), MaxWidth);
+    return IntRange::join(L, R);
   }
 
   if (UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
     switch (UO->getOpcode()) {
     // Boolean-valued operations are white-listed.
     case UnaryOperator::LNot:
-      return true;
+      return IntRange::forBoolType();
 
     // Operations with opaque sources are black-listed.
     case UnaryOperator::Deref:
     case UnaryOperator::AddrOf: // should be impossible
-      return false;
-
     case UnaryOperator::OffsetOf:
-      return false;
+      return IntRange::forType(C, E->getType());
 
     default:
-      return IsExprValueWithinWidth(C, UO->getSubExpr(), Width);
+      return GetExprRange(C, UO->getSubExpr(), MaxWidth);
     }
   }
 
-  // Don't diagnose if the expression is an integer constant
-  // whose value in the target type is the same as it was
-  // in the original type.
-  Expr::EvalResult result;
-  if (E->Evaluate(result, C))
-    if (IsSameIntAfterCast(result.Val, Width))
-      return true;
+  FieldDecl *BitField = E->getBitField();
+  if (BitField) {
+    llvm::APSInt BitWidthAP = BitField->getBitWidth()->EvaluateAsInt(C);
+    unsigned BitWidth = BitWidthAP.getZExtValue();
 
-  return false;
+    return IntRange(BitWidth, BitField->getType()->isUnsignedIntegerType());
+  }
+
+  return IntRange::forType(C, E->getType());
+}
+
+/// Checks whether the given value, which currently has the given
+/// source semantics, has the same value when coerced through the
+/// target semantics.
+bool IsSameFloatAfterCast(const llvm::APFloat &value,
+                          const llvm::fltSemantics &Src,
+                          const llvm::fltSemantics &Tgt) {
+  llvm::APFloat truncated = value;
+
+  bool ignored;
+  truncated.convert(Src, llvm::APFloat::rmNearestTiesToEven, &ignored);
+  truncated.convert(Tgt, llvm::APFloat::rmNearestTiesToEven, &ignored);
+
+  return truncated.bitwiseIsEqual(value);
 }
 
+/// Checks whether the given value, which currently has the given
+/// source semantics, has the same value when coerced through the
+/// target semantics.
+///
+/// The value might be a vector of floats (or a complex number).
+bool IsSameFloatAfterCast(const APValue &value,
+                          const llvm::fltSemantics &Src,
+                          const llvm::fltSemantics &Tgt) {
+  if (value.isFloat())
+    return IsSameFloatAfterCast(value.getFloat(), Src, Tgt);
+
+  if (value.isVector()) {
+    for (unsigned i = 0, e = value.getVectorLength(); i != e; ++i)
+      if (!IsSameFloatAfterCast(value.getVectorElt(i), Src, Tgt))
+        return false;
+    return true;
+  }
+
+  assert(value.isComplexFloat());
+  return (IsSameFloatAfterCast(value.getComplexFloatReal(), Src, Tgt) &&
+          IsSameFloatAfterCast(value.getComplexFloatImag(), Src, Tgt));
+}
+
+} // end anonymous namespace
+
 /// \brief Implements -Wsign-compare.
 ///
 /// \param lex the left-hand expression
@@ -1801,53 +1827,74 @@ void Sema::CheckSignCompare(Expr *lex, Expr *rex, SourceLocation OpLoc,
   if (ExprEvalContexts.back().Context == Unevaluated)
     return;
 
+  // If either expression is value-dependent, don't warn. We'll get another
+  // chance at instantiation time.
+  if (lex->isValueDependent() || rex->isValueDependent())
+    return;
+
   QualType lt = lex->getType(), rt = rex->getType();
 
   // Only warn if both operands are integral.
   if (!lt->isIntegerType() || !rt->isIntegerType())
     return;
 
-  // If either expression is value-dependent, don't warn. We'll get another
-  // chance at instantiation time.
-  if (lex->isValueDependent() || rex->isValueDependent())
-    return;
+  // In C, the width of a bitfield determines its type, and the
+  // declared type only contributes the signedness.  This duplicates
+  // the work that will later be done by UsualUnaryConversions.
+  // Eventually, this check will be reorganized in a way that avoids
+  // this duplication.
+  if (!getLangOptions().CPlusPlus) {
+    QualType tmp;
+    tmp = Context.isPromotableBitField(lex);
+    if (!tmp.isNull()) lt = tmp;
+    tmp = Context.isPromotableBitField(rex);
+    if (!tmp.isNull()) rt = tmp;
+  }
 
   // The rule is that the signed operand becomes unsigned, so isolate the
   // signed operand.
-  Expr *signedOperand, *unsignedOperand;
+  Expr *signedOperand = lex, *unsignedOperand = rex;
+  QualType signedType = lt, unsignedType = rt;
   if (lt->isSignedIntegerType()) {
     if (rt->isSignedIntegerType()) return;
-    signedOperand = lex;
-    unsignedOperand = rex;
   } else {
     if (!rt->isSignedIntegerType()) return;
-    signedOperand = rex;
-    unsignedOperand = lex;
+    std::swap(signedOperand, unsignedOperand);
+    std::swap(signedType, unsignedType);
   }
 
+  unsigned unsignedWidth = Context.getIntWidth(unsignedType);
+  unsigned signedWidth = Context.getIntWidth(signedType);
+
   // If the unsigned type is strictly smaller than the signed type,
   // then (1) the result type will be signed and (2) the unsigned
   // value will fit fully within the signed type, and thus the result
   // of the comparison will be exact.
-  if (Context.getIntWidth(signedOperand->getType()) >
-      Context.getIntWidth(unsignedOperand->getType()))
+  if (signedWidth > unsignedWidth)
     return;
 
-  // If the value is a non-negative integer constant, then the
-  // signed->unsigned conversion won't change it.
-  if (IsSignBitProvablyZero(Context, signedOperand))
+  // Otherwise, calculate the effective ranges.
+  IntRange signedRange = GetExprRange(Context, signedOperand, signedWidth);
+  IntRange unsignedRange = GetExprRange(Context, unsignedOperand, unsignedWidth);
+
+  // We should never be unable to prove that the unsigned operand is
+  // non-negative.
+  assert(unsignedRange.NonNegative && "unsigned range includes negative?");
+
+  // If the signed operand is non-negative, then the signed->unsigned
+  // conversion won't change it.
+  if (signedRange.NonNegative)
     return;
 
   // For (in)equality comparisons, if the unsigned operand is a
   // constant which cannot collide with a overflowed signed operand,
   // then reinterpreting the signed operand as unsigned will not
   // change the result of the comparison.
-  if (Equality && IsSignBitProvablyZero(Context, unsignedOperand))
+  if (Equality && unsignedRange.Width < unsignedWidth)
     return;
 
   Diag(OpLoc, PD)
-    << lex->getType() << rex->getType()
-    << lex->getSourceRange() << rex->getSourceRange();
+    << lt << rt << lex->getSourceRange() << rex->getSourceRange();
 }
 
 /// Diagnose an implicit cast;  purely a helper for CheckImplicitConversion.
@@ -1925,20 +1972,18 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T) {
     return;
   }
 
-  unsigned SourceWidth, TargetWidth;
-  bool SourceSigned, TargetSigned;
-
-  if (!getIntProperties(Context, Source, SourceWidth, SourceSigned) ||
-      !getIntProperties(Context, Target, TargetWidth, TargetSigned))
+  if (!Source->isIntegerType() || !Target->isIntegerType())
     return;
 
-  if (SourceWidth > TargetWidth) {
-    if (IsExprValueWithinWidth(Context, E, TargetWidth))
-      return;
+  IntRange SourceRange = GetExprRange(Context, E, Context.getIntWidth(E->getType()));
+  IntRange TargetRange = IntRange::forCanonicalType(Context, Target);
+
+  // FIXME: also signed<->unsigned?
 
+  if (SourceRange.Width > TargetRange.Width) {
     // People want to build with -Wshorten-64-to-32 and not -Wconversion
     // and by god we'll let them.
-    if (SourceWidth == 64 && TargetWidth == 32)
+    if (SourceRange.Width == 64 && TargetRange.Width == 32)
       return DiagnoseImpCast(*this, E, T, diag::warn_impcast_integer_64_32);
     return DiagnoseImpCast(*this, E, T, diag::warn_impcast_integer_precision);
   }
author	John McCall <rjmccall@apple.com>	2010-01-06 05:24:50 +0000
committer	John McCall <rjmccall@apple.com>	2010-01-06 05:24:50 +0000
commit	f2370c9b4aade940e2253b5b33262ba507d1d71f (patch)
tree	a375aa5b288f28b3b2f040a626064f49827d5eab /lib/Sema/SemaChecking.cpp
parent	1a78afbde2257d01bd38a36e094d3e3231a9b412 (diff)