diff options
author | Jordy Rose <jediknil@belkadan.com> | 2011-06-16 07:13:34 +0000 |
---|---|---|
committer | Jordy Rose <jediknil@belkadan.com> | 2011-06-16 07:13:34 +0000 |
commit | adc42d412d747391dbcee234610f00b0f087cf7b (patch) | |
tree | 6979bd907165dc5a5c140c55a1e8de54a98e53ad | |
parent | b11382497a923b0d7009e85a1d8eb7bf93ec6d0d (diff) |
[analyzer] Clean up modeling of strcmp, including cases where a string literal has an embedded null character, and where both arguments are the same buffer. Also use nested ifs rather than early returns; in this case early returns will lose any assumptions we've made earlier in the function.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@133154 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/StaticAnalyzer/Checkers/CStringChecker.cpp | 126 | ||||
-rw-r--r-- | test/Analysis/string.c | 33 |
2 files changed, 114 insertions, 45 deletions
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 2ea6e0643b..84c201026d 100644 --- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -1386,24 +1386,24 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, } void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { - //int strcmp(const char *restrict s1, const char *restrict s2); + //int strcmp(const char *s1, const char *s2); evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); } void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { - //int strncmp(const char *restrict s1, const char *restrict s2, size_t n); + //int strncmp(const char *s1, const char *s2, size_t n); evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); } void CStringChecker::evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const { - //int strcasecmp(const char *restrict s1, const char *restrict s2); + //int strcasecmp(const char *s1, const char *s2); evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); } void CStringChecker::evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const { - //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n); + //int strncasecmp(const char *s1, const char *s2, size_t n); evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); } @@ -1435,52 +1435,96 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, if (s2Length.isUndef()) return; - // Get the string literal of the first string. - const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); - if (!s1StrLiteral) - return; - llvm::StringRef s1StrRef = s1StrLiteral->getString(); + // If we know the two buffers are the same, we know the result is 0. + // First, get the two buffers' addresses. Another checker will have already + // made sure they're not undefined. + DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val); + DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val); + + // See if they are the same. + SValBuilder &svalBuilder = C.getSValBuilder(); + DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); + const GRState *StSameBuf, *StNotSameBuf; + llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); + + // If the two arguments might be the same buffer, we know the result is 0, + // and we only need to check one size. + if (StSameBuf) { + StSameBuf = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); + C.addTransition(StSameBuf); + + // If the two arguments are GUARANTEED to be the same, we're done! + if (!StNotSameBuf) + return; + } + + assert(StNotSameBuf); + state = StNotSameBuf; - // Get the string literal of the second string. + // At this point we can go about comparing the two buffers. + // For now, we only do this if they're both known string literals. + + // Attempt to extract string literals from both expressions. + const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); - if (!s2StrLiteral) - return; - llvm::StringRef s2StrRef = s2StrLiteral->getString(); + bool canComputeResult = false; + + if (s1StrLiteral && s2StrLiteral) { + llvm::StringRef s1StrRef = s1StrLiteral->getString(); + llvm::StringRef s2StrRef = s2StrLiteral->getString(); + + if (isBounded) { + // Get the max number of characters to compare. + const Expr *lenExpr = CE->getArg(2); + SVal lenVal = state->getSVal(lenExpr); + + // If the length is known, we can get the right substrings. + if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { + // Create substrings of each to compare the prefix. + s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); + s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); + canComputeResult = true; + } + } else { + // This is a normal, unbounded strcmp. + canComputeResult = true; + } - int result; - if (isBounded) { - // Get the max number of characters to compare. - const Expr *lenExpr = CE->getArg(2); - SVal lenVal = state->getSVal(lenExpr); + if (canComputeResult) { + // Real strcmp stops at null characters. + size_t s1Term = s1StrRef.find('\0'); + if (s1Term != llvm::StringRef::npos) + s1StrRef = s1StrRef.substr(0, s1Term); - // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt - // then give up, otherwise get the value and use it as the bounds. - nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal); - if (!CI) - return; - llvm::APSInt lenInt(CI->getValue()); + size_t s2Term = s2StrRef.find('\0'); + if (s2Term != llvm::StringRef::npos) + s2StrRef = s2StrRef.substr(0, s2Term); + + // Use StringRef's comparison methods to compute the actual result. + int result; + + if (ignoreCase) { + // Compare string 1 to string 2 the same way strcasecmp() does. + result = s1StrRef.compare_lower(s2StrRef); + } else { + // Compare string 1 to string 2 the same way strcmp() does. + result = s1StrRef.compare(s2StrRef); + } - // Create substrings of each to compare the prefix. - s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue()); - s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue()); + // Build the SVal of the comparison and bind the return value. + SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); + state = state->BindExpr(CE, resultVal); + } } - if (ignoreCase) { - // Compare string 1 to string 2 the same way strcasecmp() does. - result = s1StrRef.compare_lower(s2StrRef); - } else { - // Compare string 1 to string 2 the same way strcmp() does. - result = s1StrRef.compare(s2StrRef); + if (!canComputeResult) { + // Conjure a symbolic value. It's the best we can do. + unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); + SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); + state = state->BindExpr(CE, resultVal); } - - // Build the SVal of the comparison to bind the return value. - SValBuilder &svalBuilder = C.getSValBuilder(); - QualType intTy = svalBuilder.getContext().IntTy; - SVal resultVal = svalBuilder.makeIntVal(result, intTy); - // Bind the return value of the expression. - // Set the return value. - state = state->BindExpr(CE, resultVal); + // Record this as a possible path. C.addTransition(state); } diff --git a/test/Analysis/string.c b/test/Analysis/string.c index 98fd8c09c4..4b6a0665d9 100644 --- a/test/Analysis/string.c +++ b/test/Analysis/string.c @@ -542,7 +542,7 @@ void strncat_no_overflow_2(char *y) { //===----------------------------------------------------------------------=== #define strcmp BUILTIN(strcmp) -int strcmp(const char *restrict s1, const char *restrict s2); +int strcmp(const char * s1, const char * s2); void strcmp_constant0() { if (strcmp("123", "123") != 0) @@ -622,12 +622,22 @@ void strcmp_diff_length_3() { (void)*(char*)0; // no-warning } +void strcmp_embedded_null () { + if (strcmp("\0z", "\0y") != 0) + (void)*(char*)0; // no-warning +} + +void strcmp_unknown_arg (char *unknown) { + if (strcmp(unknown, unknown) != 0) + (void)*(char*)0; // no-warning +} + //===----------------------------------------------------------------------=== // strncmp() //===----------------------------------------------------------------------=== #define strncmp BUILTIN(strncmp) -int strncmp(const char *restrict s1, const char *restrict s2, size_t n); +int strncmp(const char *s1, const char *s2, size_t n); void strncmp_constant0() { if (strncmp("123", "123", 3) != 0) @@ -728,12 +738,17 @@ void strncmp_diff_length_6() { (void)*(char*)0; // no-warning } +void strncmp_embedded_null () { + if (strncmp("ab\0zz", "ab\0yy", 4) != 0) + (void)*(char*)0; // no-warning +} + //===----------------------------------------------------------------------=== // strcasecmp() //===----------------------------------------------------------------------=== #define strcasecmp BUILTIN(strcasecmp) -int strcasecmp(const char *restrict s1, const char *restrict s2); +int strcasecmp(const char *s1, const char *s2); void strcasecmp_constant0() { if (strcasecmp("abc", "Abc") != 0) @@ -813,12 +828,17 @@ void strcasecmp_diff_length_3() { (void)*(char*)0; // no-warning } +void strcasecmp_embedded_null () { + if (strcasecmp("ab\0zz", "ab\0yy") != 0) + (void)*(char*)0; // no-warning +} + //===----------------------------------------------------------------------=== // strncasecmp() //===----------------------------------------------------------------------=== #define strncasecmp BUILTIN(strncasecmp) -int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n); +int strncasecmp(const char *s1, const char *s2, size_t n); void strncasecmp_constant0() { if (strncasecmp("abc", "Abc", 3) != 0) @@ -918,3 +938,8 @@ void strncasecmp_diff_length_6() { if (strncasecmp(x, y, 3) != 1) (void)*(char*)0; // no-warning } + +void strncasecmp_embedded_null () { + if (strncasecmp("ab\0zz", "ab\0yy", 4) != 0) + (void)*(char*)0; // no-warning +} |