[analyzer] Clean up modeling of strcmp, including cases where a string literal has an embedded null character, and where both arguments are the same buffer. Also use nested ifs rather than early returns; in this case early returns will lose any assumptions we've made earlier in the function.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@133154 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jordy Rose <jediknil@belkadan.com> 2011-06-16 07:13:34 +0000
committer: Jordy Rose <jediknil@belkadan.com> 2011-06-16 07:13:34 +0000
commit: adc42d412d747391dbcee234610f00b0f087cf7b (patch)
tree: 6979bd907165dc5a5c140c55a1e8de54a98e53ad
parent: b11382497a923b0d7009e85a1d8eb7bf93ec6d0d (diff)
2 files changed, 114 insertions, 45 deletions
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 2ea6e0643b..84c201026d 100644
--- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -1386,24 +1386,24 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
 }
 
 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
-  //int strcmp(const char *restrict s1, const char *restrict s2);
+  //int strcmp(const char *s1, const char *s2);
   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
 }
 
 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
-  //int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
+  //int strncmp(const char *s1, const char *s2, size_t n);
   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
 }
 
 void CStringChecker::evalStrcasecmp(CheckerContext &C, 
                                     const CallExpr *CE) const {
-  //int strcasecmp(const char *restrict s1, const char *restrict s2);
+  //int strcasecmp(const char *s1, const char *s2);
   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
 }
 
 void CStringChecker::evalStrncasecmp(CheckerContext &C, 
                                      const CallExpr *CE) const {
-  //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
+  //int strncasecmp(const char *s1, const char *s2, size_t n);
   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
 }
 
@@ -1435,52 +1435,96 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
   if (s2Length.isUndef())
     return;
 
-  // Get the string literal of the first string.
-  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
-  if (!s1StrLiteral)
-    return;
-  llvm::StringRef s1StrRef = s1StrLiteral->getString();
+  // If we know the two buffers are the same, we know the result is 0.
+  // First, get the two buffers' addresses. Another checker will have already
+  // made sure they're not undefined.
+  DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
+  DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
+
+  // See if they are the same.
+  SValBuilder &svalBuilder = C.getSValBuilder();
+  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
+  const GRState *StSameBuf, *StNotSameBuf;
+  llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
+
+  // If the two arguments might be the same buffer, we know the result is 0,
+  // and we only need to check one size.
+  if (StSameBuf) {
+    StSameBuf = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
+    C.addTransition(StSameBuf);
+
+    // If the two arguments are GUARANTEED to be the same, we're done!
+    if (!StNotSameBuf)
+      return;
+  }
+
+  assert(StNotSameBuf);
+  state = StNotSameBuf;
 
-  // Get the string literal of the second string.
+  // At this point we can go about comparing the two buffers.
+  // For now, we only do this if they're both known string literals.
+
+  // Attempt to extract string literals from both expressions.
+  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
-  if (!s2StrLiteral)
-    return;
-  llvm::StringRef s2StrRef = s2StrLiteral->getString();
+  bool canComputeResult = false;
+
+  if (s1StrLiteral && s2StrLiteral) {
+    llvm::StringRef s1StrRef = s1StrLiteral->getString();
+    llvm::StringRef s2StrRef = s2StrLiteral->getString();
+
+    if (isBounded) {
+      // Get the max number of characters to compare.
+      const Expr *lenExpr = CE->getArg(2);
+      SVal lenVal = state->getSVal(lenExpr);
+
+      // If the length is known, we can get the right substrings.
+      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
+        // Create substrings of each to compare the prefix.
+        s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
+        s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
+        canComputeResult = true;
+      }
+    } else {
+      // This is a normal, unbounded strcmp.
+      canComputeResult = true;
+    }
 
-  int result;
-  if (isBounded) {
-    // Get the max number of characters to compare.
-    const Expr *lenExpr = CE->getArg(2);
-    SVal lenVal = state->getSVal(lenExpr);
+    if (canComputeResult) {
+      // Real strcmp stops at null characters.
+      size_t s1Term = s1StrRef.find('\0');
+      if (s1Term != llvm::StringRef::npos)
+        s1StrRef = s1StrRef.substr(0, s1Term);
 
-    // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt
-    // then give up, otherwise get the value and use it as the bounds.
-    nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal);
-    if (!CI)
-      return;
-    llvm::APSInt lenInt(CI->getValue());
+      size_t s2Term = s2StrRef.find('\0');
+      if (s2Term != llvm::StringRef::npos)
+        s2StrRef = s2StrRef.substr(0, s2Term);
+
+      // Use StringRef's comparison methods to compute the actual result.
+      int result;
+
+      if (ignoreCase) {
+        // Compare string 1 to string 2 the same way strcasecmp() does.
+        result = s1StrRef.compare_lower(s2StrRef);
+      } else {
+        // Compare string 1 to string 2 the same way strcmp() does.
+        result = s1StrRef.compare(s2StrRef);
+      }
 
-    // Create substrings of each to compare the prefix.
-    s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue());
-    s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue());
+      // Build the SVal of the comparison and bind the return value.
+      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
+      state = state->BindExpr(CE, resultVal);
+    }
   }
 
-  if (ignoreCase) {
-    // Compare string 1 to string 2 the same way strcasecmp() does.
-    result = s1StrRef.compare_lower(s2StrRef);
-  } else {
-    // Compare string 1 to string 2 the same way strcmp() does.
-    result = s1StrRef.compare(s2StrRef);
+  if (!canComputeResult) {
+    // Conjure a symbolic value. It's the best we can do.
+    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+    SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
+    state = state->BindExpr(CE, resultVal);
   }
-  
-  // Build the SVal of the comparison to bind the return value.
-  SValBuilder &svalBuilder = C.getSValBuilder();
-  QualType intTy = svalBuilder.getContext().IntTy;
-  SVal resultVal = svalBuilder.makeIntVal(result, intTy);
 
-  // Bind the return value of the expression.
-  // Set the return value.
-  state = state->BindExpr(CE, resultVal);
+  // Record this as a possible path.
   C.addTransition(state);
 }
 
diff --git a/test/Analysis/string.c b/test/Analysis/string.c
index 98fd8c09c4..4b6a0665d9 100644
--- a/test/Analysis/string.c
+++ b/test/Analysis/string.c
@@ -542,7 +542,7 @@ void strncat_no_overflow_2(char *y) {
 //===----------------------------------------------------------------------===
 
 #define strcmp BUILTIN(strcmp)
-int strcmp(const char *restrict s1, const char *restrict s2);
+int strcmp(const char * s1, const char * s2);
 
 void strcmp_constant0() {
   if (strcmp("123", "123") != 0)
@@ -622,12 +622,22 @@ void strcmp_diff_length_3() {
     (void)*(char*)0; // no-warning
 }
 
+void strcmp_embedded_null () {
+	if (strcmp("\0z", "\0y") != 0)
+		(void)*(char*)0; // no-warning
+}
+
+void strcmp_unknown_arg (char *unknown) {
+	if (strcmp(unknown, unknown) != 0)
+		(void)*(char*)0; // no-warning
+}
+
 //===----------------------------------------------------------------------===
 // strncmp()
 //===----------------------------------------------------------------------===
 
 #define strncmp BUILTIN(strncmp)
-int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
+int strncmp(const char *s1, const char *s2, size_t n);
 
 void strncmp_constant0() {
   if (strncmp("123", "123", 3) != 0)
@@ -728,12 +738,17 @@ void strncmp_diff_length_6() {
     (void)*(char*)0; // no-warning
 }
 
+void strncmp_embedded_null () {
+	if (strncmp("ab\0zz", "ab\0yy", 4) != 0)
+		(void)*(char*)0; // no-warning
+}
+
 //===----------------------------------------------------------------------===
 // strcasecmp()
 //===----------------------------------------------------------------------===
 
 #define strcasecmp BUILTIN(strcasecmp)
-int strcasecmp(const char *restrict s1, const char *restrict s2);
+int strcasecmp(const char *s1, const char *s2);
 
 void strcasecmp_constant0() {
   if (strcasecmp("abc", "Abc") != 0)
@@ -813,12 +828,17 @@ void strcasecmp_diff_length_3() {
     (void)*(char*)0; // no-warning
 }
 
+void strcasecmp_embedded_null () {
+	if (strcasecmp("ab\0zz", "ab\0yy") != 0)
+		(void)*(char*)0; // no-warning
+}
+
 //===----------------------------------------------------------------------===
 // strncasecmp()
 //===----------------------------------------------------------------------===
 
 #define strncasecmp BUILTIN(strncasecmp)
-int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
+int strncasecmp(const char *s1, const char *s2, size_t n);
 
 void strncasecmp_constant0() {
   if (strncasecmp("abc", "Abc", 3) != 0)
@@ -918,3 +938,8 @@ void strncasecmp_diff_length_6() {
   if (strncasecmp(x, y, 3) != 1)
     (void)*(char*)0; // no-warning
 }
+
+void strncasecmp_embedded_null () {
+	if (strncasecmp("ab\0zz", "ab\0yy", 4) != 0)
+		(void)*(char*)0; // no-warning
+}
author	Jordy Rose <jediknil@belkadan.com>	2011-06-16 07:13:34 +0000
committer	Jordy Rose <jediknil@belkadan.com>	2011-06-16 07:13:34 +0000
commit	adc42d412d747391dbcee234610f00b0f087cf7b (patch)
tree	6979bd907165dc5a5c140c55a1e8de54a98e53ad
parent	b11382497a923b0d7009e85a1d8eb7bf93ec6d0d (diff)