diff options
Diffstat (limited to 'Sema/SemaChecking.cpp')
-rw-r--r-- | Sema/SemaChecking.cpp | 292 |
1 files changed, 263 insertions, 29 deletions
diff --git a/Sema/SemaChecking.cpp b/Sema/SemaChecking.cpp index 8cc3c6c147..511f56f2a8 100644 --- a/Sema/SemaChecking.cpp +++ b/Sema/SemaChecking.cpp @@ -29,7 +29,9 @@ using namespace clang; /// CheckFunctionCall - Check a direct function call for various correctness /// and safety properties not strictly enforced by the C type system. void -Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl, +Sema::CheckFunctionCall(Expr *Fn, + SourceLocation LParenLoc, SourceLocation RParenLoc, + FunctionDecl *FDecl, Expr** Args, unsigned NumArgsInCall) { // Get the IdentifierInfo* for the called function. @@ -37,55 +39,287 @@ Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl, // Search the KnownFunctionIDs for the identifier. unsigned i = 0, e = id_num_known_functions; - for ( ; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } - if( i == e ) return; + for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } + if (i == e) return; // Printf checking. if (i <= id_vprintf) { - // Retrieve the index of the format string parameter. + // Retrieve the index of the format string parameter and determine + // if the function is passed a va_arg argument. unsigned format_idx = 0; + bool HasVAListArg = false; + switch (i) { default: assert(false && "No format string argument index."); case id_printf: format_idx = 0; break; case id_fprintf: format_idx = 1; break; case id_sprintf: format_idx = 1; break; case id_snprintf: format_idx = 2; break; - case id_vsnprintf: format_idx = 2; break; - case id_asprintf: format_idx = 1; break; - case id_vasprintf: format_idx = 1; break; - case id_vfprintf: format_idx = 1; break; - case id_vsprintf: format_idx = 1; break; - case id_vprintf: format_idx = 1; break; - } - CheckPrintfArguments(Fn, i, FDecl, format_idx, Args, NumArgsInCall); + case id_asprintf: format_idx = 1; HasVAListArg = true; break; + case id_vsnprintf: format_idx = 2; HasVAListArg = true; break; + case id_vasprintf: format_idx = 1; HasVAListArg = true; break; + case id_vfprintf: format_idx = 1; HasVAListArg = true; break; + case id_vsprintf: format_idx = 1; HasVAListArg = true; break; + case id_vprintf: format_idx = 0; HasVAListArg = true; break; + } + + CheckPrintfArguments(Fn, LParenLoc, RParenLoc, HasVAListArg, + FDecl, format_idx, Args, NumArgsInCall); } } /// CheckPrintfArguments - Check calls to printf (and similar functions) for -/// correct use of format strings. Improper format strings to functions in -/// the printf family can be the source of bizarre bugs and very serious -/// security holes. A good source of information is available in the following -/// paper (which includes additional references): +/// correct use of format strings. +/// +/// HasVAListArg - A predicate indicating whether the printf-like +/// function is passed an explicit va_arg argument (e.g., vprintf) +/// +/// format_idx - The index into Args for the format string. +/// +/// Improper format strings to functions in the printf family can be +/// the source of bizarre bugs and very serious security holes. A +/// good source of information is available in the following paper +/// (which includes additional references): /// /// FormatGuard: Automatic Protection From printf Format String /// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. +/// +/// Functionality implemented: +/// +/// We can statically check the following properties for string +/// literal format strings for non v.*printf functions (where the +/// arguments are passed directly): +// +/// (1) Are the number of format conversions equal to the number of +/// data arguments? +/// +/// (2) Does each format conversion correctly match the type of the +/// corresponding data argument? (TODO) +/// +/// Moreover, for all printf functions we can: +/// +/// (3) Check for a missing format string (when not caught by type checking). +/// +/// (4) Check for no-operation flags; e.g. using "#" with format +/// conversion 'c' (TODO) +/// +/// (5) Check the use of '%n', a major source of security holes. +/// +/// (6) Check for malformed format conversions that don't specify anything. +/// +/// (7) Check for empty format strings. e.g: printf(""); +/// +/// (8) Check that the format string is a wide literal. +/// +/// All of these checks can be done by parsing the format string. +/// +/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). void -Sema::CheckPrintfArguments(Expr *Fn, unsigned id_idx, FunctionDecl *FDecl, +Sema::CheckPrintfArguments(Expr *Fn, + SourceLocation LParenLoc, SourceLocation RParenLoc, + bool HasVAListArg, FunctionDecl *FDecl, unsigned format_idx, Expr** Args, unsigned NumArgsInCall) { - - assert( format_idx < NumArgsInCall ); - + // CHECK: printf-like function is called with no format string. + if (format_idx >= NumArgsInCall) { + Diag(RParenLoc, diag::warn_printf_missing_format_string, + Fn->getSourceRange()); + return; + } + // CHECK: format string is not a string literal. // - // Dynamically generated format strings are difficult to automatically - // vet at compile time. Requiring that format strings are string literals - // (1) permits the checking of format strings by the compiler and thereby - // (2) can practically remove the source of many format string exploits. - + // Dynamically generated format strings are difficult to + // automatically vet at compile time. Requiring that format strings + // are string literals: (1) permits the checking of format strings by + // the compiler and thereby (2) can practically remove the source of + // many format string exploits. StringLiteral *FExpr = dyn_cast<StringLiteral>(Args[format_idx]); - if ( FExpr == NULL ) - Diag( Args[format_idx]->getLocStart(), - diag::warn_printf_not_string_constant, Fn->getSourceRange() ); -}
\ No newline at end of file + if (FExpr == NULL) { + Diag(Args[format_idx]->getLocStart(), + diag::warn_printf_not_string_constant, Fn->getSourceRange()); + return; + } + + // CHECK: is the format string a wide literal? + if (FExpr->isWide()) { + Diag(Args[format_idx]->getLocStart(), + diag::warn_printf_format_string_is_wide_literal, + Fn->getSourceRange()); + return; + } + + // Str - The format string. NOTE: this is NOT null-terminated! + const char * const Str = FExpr->getStrData(); + + // CHECK: empty format string? + const unsigned StrLen = FExpr->getByteLength(); + + if (StrLen == 0) { + Diag(Args[format_idx]->getLocStart(), + diag::warn_printf_empty_format_string, Fn->getSourceRange()); + return; + } + + // We process the format string using a binary state machine. The + // current state is stored in CurrentState. + enum { + state_OrdChr, + state_Conversion + } CurrentState = state_OrdChr; + + // numConversions - The number of conversions seen so far. This is + // incremented as we traverse the format string. + unsigned numConversions = 0; + + // numDataArgs - The number of data arguments after the format + // string. This can only be determined for non vprintf-like + // functions. For those functions, this value is 1 (the sole + // va_arg argument). + unsigned numDataArgs = NumArgsInCall-(format_idx+1); + + // Inspect the format string. + unsigned StrIdx = 0; + + // LastConversionIdx - Index within the format string where we last saw + // a '%' character that starts a new format conversion. + unsigned LastConversionIdx = 0; + + for ( ; StrIdx < StrLen ; ++StrIdx ) { + + // Is the number of detected conversion conversions greater than + // the number of matching data arguments? If so, stop. + if (!HasVAListArg && numConversions > numDataArgs) break; + + // Handle "\0" + if(Str[StrIdx] == '\0' ) { + // The string returned by getStrData() is not null-terminated, + // so the presence of a null character is likely an error. + + SourceLocation Loc = + PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(),StrIdx+1); + + Diag(Loc, diag::warn_printf_format_string_contains_null_char, + Fn->getSourceRange()); + + return; + } + + // Ordinary characters (not processing a format conversion). + if (CurrentState == state_OrdChr) { + if (Str[StrIdx] == '%') { + CurrentState = state_Conversion; + LastConversionIdx = StrIdx; + } + continue; + } + + // Seen '%'. Now processing a format conversion. + switch (Str[StrIdx]) { + // Characters which can terminate a format conversion + // (e.g. "%d"). Characters that specify length modifiers or + // other flags are handled by the default case below. + // + // TODO: additional checks will go into the following cases. + case 'i': + case 'd': + case 'o': + case 'u': + case 'x': + case 'X': + case 'D': + case 'O': + case 'U': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'a': + case 'A': + case 'c': + case 'C': + case 'S': + case 's': + case 'P': + ++numConversions; + CurrentState = state_OrdChr; + break; + + // CHECK: Are we using "%n"? Issue a warning. + case 'n': { + ++numConversions; + CurrentState = state_OrdChr; + SourceLocation Loc = + PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(), + LastConversionIdx+1); + + Diag(Loc, diag::warn_printf_write_back, Fn->getSourceRange()); + break; + } + + // Handle "%%" + case '%': + // Sanity check: Was the first "%" character the previous one? + // If not, we will assume that we have a malformed format + // conversion, and that the current "%" character is the start + // of a new conversion. + if (StrIdx - LastConversionIdx == 1) + CurrentState = state_OrdChr; + else { + // Issue a warning: invalid format conversion. + SourceLocation Loc = + PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(), + LastConversionIdx+1); + + Diag(Loc, diag::warn_printf_invalid_conversion, + std::string(Str+LastConversionIdx, Str+StrIdx), + Fn->getSourceRange()); + + // This conversion is broken. Advance to the next format + // conversion. + LastConversionIdx = StrIdx; + ++numConversions; + } + + break; + + default: + // This case catches all other characters: flags, widths, etc. + // We should eventually process those as well. + break; + } + } + + if (CurrentState == state_Conversion) { + // Issue a warning: invalid format conversion. + SourceLocation Loc = + PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(), + LastConversionIdx+1); + + Diag(Loc, diag::warn_printf_invalid_conversion, + std::string(Str+LastConversionIdx, Str+StrIdx), + Fn->getSourceRange()); + return; + } + + if (!HasVAListArg) { + // CHECK: Does the number of format conversions exceed the number + // of data arguments? + if (numConversions > numDataArgs) { + SourceLocation Loc = + PP.AdvanceToTokenCharacter(Args[format_idx]->getLocStart(), + LastConversionIdx); + + Diag(Loc, diag::warn_printf_insufficient_data_args, + Fn->getSourceRange()); + } + // CHECK: Does the number of data arguments exceed the number of + // format conversions in the format string? + else if (numConversions < numDataArgs) + Diag(Args[format_idx+numConversions+1]->getLocStart(), + diag::warn_printf_too_many_data_args, Fn->getSourceRange()); + } +} |