diff options
-rw-r--r-- | include/clang/Basic/DiagnosticSemaKinds.td | 6 | ||||
-rw-r--r-- | lib/Sema/Sema.h | 7 | ||||
-rw-r--r-- | lib/Sema/SemaChecking.cpp | 307 | ||||
-rw-r--r-- | test/Sema/format-strings.c | 26 | ||||
-rw-r--r-- | test/SemaObjC/format-strings-objc.m | 2 |
5 files changed, 65 insertions, 283 deletions
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 7cd8d29051..bdd58d6d15 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -2442,11 +2442,11 @@ def warn_printf_write_back : Warning< def warn_printf_insufficient_data_args : Warning< "more '%%' conversions than data arguments">, InGroup<Format>; def warn_printf_too_many_data_args : Warning< - "more data arguments than '%%' conversions">, InGroup<FormatExtraArgs>; + "more data arguments than format specifiers">, InGroup<FormatExtraArgs>; def warn_printf_invalid_conversion : Warning< - "invalid conversion '%0'">, InGroup<Format>; + "invalid conversion specifier '%0'">, InGroup<Format>; def warn_printf_incomplete_specifier : Warning< - "incomplete format specifier '%0'">, InGroup<Format>; + "incomplete format specifier">, InGroup<Format>; def warn_printf_missing_format_string : Warning< "format string missing">, InGroup<Format>; def warn_null_arg : Warning< diff --git a/lib/Sema/Sema.h b/lib/Sema/Sema.h index 7d87297f67..b6f477f648 100644 --- a/lib/Sema/Sema.h +++ b/lib/Sema/Sema.h @@ -4052,13 +4052,6 @@ private: bool SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg); - // FIXME: This function is placeholder for transitioning the printf - // format string checking to a new codepath. It will eventually - // replace CheckPrintfString(). - void AlternateCheckPrintfString(const StringLiteral *FExpr, - const Expr *OrigFormatExpr, - const CallExpr *TheCall, bool HasVAListArg, - unsigned format_idx, unsigned firstDataArg); void CheckPrintfString(const StringLiteral *FExpr, const Expr *OrigFormatExpr, const CallExpr *TheCall, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg); diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 1a7a203171..38f3f2df47 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -1033,254 +1033,6 @@ Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, << OrigFormatExpr->getSourceRange(); } -void Sema::CheckPrintfString(const StringLiteral *FExpr, - const Expr *OrigFormatExpr, - const CallExpr *TheCall, bool HasVAListArg, - unsigned format_idx, unsigned firstDataArg) { - - static bool UseAlternatePrintfChecking = false; - if (UseAlternatePrintfChecking) { - AlternateCheckPrintfString(FExpr, OrigFormatExpr, TheCall, - HasVAListArg, format_idx, firstDataArg); - return; - } - - - const ObjCStringLiteral *ObjCFExpr = - dyn_cast<ObjCStringLiteral>(OrigFormatExpr); - - // CHECK: is the format string a wide literal? - if (FExpr->isWide()) { - Diag(FExpr->getLocStart(), - diag::warn_printf_format_string_is_wide_literal) - << OrigFormatExpr->getSourceRange(); - return; - } - - // Str - The format string. NOTE: this is NOT null-terminated! - const char *Str = FExpr->getStrData(); - - // CHECK: empty format string? - unsigned StrLen = FExpr->getByteLength(); - - if (StrLen == 0) { - Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string) - << OrigFormatExpr->getSourceRange(); - return; - } - - // We process the format string using a binary state machine. The - // current state is stored in CurrentState. - enum { - state_OrdChr, - state_Conversion - } CurrentState = state_OrdChr; - - // numConversions - The number of conversions seen so far. This is - // incremented as we traverse the format string. - unsigned numConversions = 0; - - // numDataArgs - The number of data arguments after the format - // string. This can only be determined for non vprintf-like - // functions. For those functions, this value is 1 (the sole - // va_arg argument). - unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg; - - // Inspect the format string. - unsigned StrIdx = 0; - - // LastConversionIdx - Index within the format string where we last saw - // a '%' character that starts a new format conversion. - unsigned LastConversionIdx = 0; - - for (; StrIdx < StrLen; ++StrIdx) { - - // Is the number of detected conversion conversions greater than - // the number of matching data arguments? If so, stop. - if (!HasVAListArg && numConversions > numDataArgs) break; - - // Handle "\0" - if (Str[StrIdx] == '\0') { - // The string returned by getStrData() is not null-terminated, - // so the presence of a null character is likely an error. - Diag(getLocationOfStringLiteralByte(FExpr, StrIdx), - diag::warn_printf_format_string_contains_null_char) - << OrigFormatExpr->getSourceRange(); - return; - } - - // Ordinary characters (not processing a format conversion). - if (CurrentState == state_OrdChr) { - if (Str[StrIdx] == '%') { - CurrentState = state_Conversion; - LastConversionIdx = StrIdx; - } - continue; - } - - // Seen '%'. Now processing a format conversion. - switch (Str[StrIdx]) { - // Handle dynamic precision or width specifier. - case '*': { - ++numConversions; - - if (!HasVAListArg) { - if (numConversions > numDataArgs) { - SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); - - if (Str[StrIdx-1] == '.') - Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg) - << OrigFormatExpr->getSourceRange(); - else - Diag(Loc, diag::warn_printf_asterisk_width_missing_arg) - << OrigFormatExpr->getSourceRange(); - - // Don't do any more checking. We'll just emit spurious errors. - return; - } - - // Perform type checking on width/precision specifier. - const Expr *E = TheCall->getArg(format_idx+numConversions); - if (const BuiltinType *BT = E->getType()->getAs<BuiltinType>()) - if (BT->getKind() == BuiltinType::Int) - break; - - SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx); - - if (Str[StrIdx-1] == '.') - Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type) - << E->getType() << E->getSourceRange(); - else - Diag(Loc, diag::warn_printf_asterisk_width_wrong_type) - << E->getType() << E->getSourceRange(); - - break; - } - } - - // Characters which can terminate a format conversion - // (e.g. "%d"). Characters that specify length modifiers or - // other flags are handled by the default case below. - // - // FIXME: additional checks will go into the following cases. - case 'i': - case 'd': - case 'o': - case 'u': - case 'x': - case 'X': - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - case 'a': - case 'A': - case 'c': - case 's': - case 'p': - ++numConversions; - CurrentState = state_OrdChr; - break; - - case 'm': - // FIXME: Warn in situations where this isn't supported! - CurrentState = state_OrdChr; - break; - - // CHECK: Are we using "%n"? Issue a warning. - case 'n': { - ++numConversions; - CurrentState = state_OrdChr; - SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, - LastConversionIdx); - - Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange(); - break; - } - - // Handle "%@" - case '@': - // %@ is allowed in ObjC format strings only. - if (ObjCFExpr != NULL) - CurrentState = state_OrdChr; - else { - // Issue a warning: invalid format conversion. - SourceLocation Loc = - getLocationOfStringLiteralByte(FExpr, LastConversionIdx); - - Diag(Loc, diag::warn_printf_invalid_conversion) - << std::string(Str+LastConversionIdx, - Str+std::min(LastConversionIdx+2, StrLen)) - << OrigFormatExpr->getSourceRange(); - } - ++numConversions; - break; - - // Handle "%%" - case '%': - // Sanity check: Was the first "%" character the previous one? - // If not, we will assume that we have a malformed format - // conversion, and that the current "%" character is the start - // of a new conversion. - if (StrIdx - LastConversionIdx == 1) - CurrentState = state_OrdChr; - else { - // Issue a warning: invalid format conversion. - SourceLocation Loc = - getLocationOfStringLiteralByte(FExpr, LastConversionIdx); - - Diag(Loc, diag::warn_printf_invalid_conversion) - << std::string(Str+LastConversionIdx, Str+StrIdx) - << OrigFormatExpr->getSourceRange(); - - // This conversion is broken. Advance to the next format - // conversion. - LastConversionIdx = StrIdx; - ++numConversions; - } - break; - - default: - // This case catches all other characters: flags, widths, etc. - // We should eventually process those as well. - break; - } - } - - if (CurrentState == state_Conversion) { - // Issue a warning: invalid format conversion. - SourceLocation Loc = - getLocationOfStringLiteralByte(FExpr, LastConversionIdx); - - Diag(Loc, diag::warn_printf_invalid_conversion) - << std::string(Str+LastConversionIdx, - Str+std::min(LastConversionIdx+2, StrLen)) - << OrigFormatExpr->getSourceRange(); - return; - } - - if (!HasVAListArg) { - // CHECK: Does the number of format conversions exceed the number - // of data arguments? - if (numConversions > numDataArgs) { - SourceLocation Loc = - getLocationOfStringLiteralByte(FExpr, LastConversionIdx); - - Diag(Loc, diag::warn_printf_insufficient_data_args) - << OrigFormatExpr->getSourceRange(); - } - // CHECK: Does the number of data arguments exceed the number of - // format conversions in the format string? - else if (numConversions < numDataArgs) - Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(), - diag::warn_printf_too_many_data_args) - << OrigFormatExpr->getSourceRange(); - } -} - - namespace { class CheckPrintfHandler : public FormatStringHandler { Sema &S; @@ -1320,20 +1072,29 @@ public: const char *startSpecifier, unsigned specifierLen); private: - SourceRange getFormatRange(); + SourceRange getFormatStringRange(); + SourceRange getFormatSpecifierRange(const char *startSpecifier, + unsigned specifierLen); SourceLocation getLocationOfByte(const char *x); bool HandleAmount(const analyze_printf::OptionalAmount &Amt, - unsigned MissingArgDiag, unsigned BadTypeDiag); + unsigned MissingArgDiag, unsigned BadTypeDiag, + const char *startSpecifier, unsigned specifierLen); const Expr *getDataArg(unsigned i) const; }; } -SourceRange CheckPrintfHandler::getFormatRange() { +SourceRange CheckPrintfHandler::getFormatStringRange() { return OrigFormatExpr->getSourceRange(); } +SourceRange CheckPrintfHandler:: +getFormatSpecifierRange(const char *startSpecifier, unsigned specifierLen) { + return SourceRange(getLocationOfByte(startSpecifier), + getLocationOfByte(startSpecifier+specifierLen-1)); +} + SourceLocation CheckPrintfHandler::getLocationOfByte(const char *x) { return S.getLocationOfStringLiteralByte(FExpr, x - Beg); } @@ -1343,8 +1104,7 @@ HandleIncompleteFormatSpecifier(const char *startSpecifier, unsigned specifierLen) { SourceLocation Loc = getLocationOfByte(startSpecifier); S.Diag(Loc, diag::warn_printf_incomplete_specifier) - << llvm::StringRef(startSpecifier, specifierLen) - << getFormatRange(); + << getFormatSpecifierRange(startSpecifier, specifierLen); } void CheckPrintfHandler:: @@ -1358,14 +1118,14 @@ HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, SourceLocation Loc = getLocationOfByte(CS.getStart()); S.Diag(Loc, diag::warn_printf_invalid_conversion) << llvm::StringRef(CS.getStart(), CS.getLength()) - << getFormatRange(); + << getFormatSpecifierRange(startSpecifier, specifierLen); } void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { // The presence of a null character is likely an error. S.Diag(getLocationOfByte(nullCharacter), diag::warn_printf_format_string_contains_null_char) - << getFormatRange(); + << getFormatStringRange(); } const Expr *CheckPrintfHandler::getDataArg(unsigned i) const { @@ -1375,14 +1135,16 @@ const Expr *CheckPrintfHandler::getDataArg(unsigned i) const { bool CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned MissingArgDiag, - unsigned BadTypeDiag) { + unsigned BadTypeDiag, + const char *startSpecifier, + unsigned specifierLen) { if (Amt.hasDataArgument()) { ++NumConversions; if (!HasVAListArg) { if (NumConversions > NumDataArgs) { S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag) - << getFormatRange(); + << getFormatSpecifierRange(startSpecifier, specifierLen); // Don't do any more checking. We will just emit // spurious errors. return false; @@ -1394,7 +1156,9 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, const BuiltinType *BT = T->getAs<BuiltinType>(); if (!BT || BT->getKind() != BuiltinType::Int) { S.Diag(getLocationOfByte(Amt.getStart()), BadTypeDiag) - << T << getFormatRange() << Arg->getSourceRange(); + << T + << getFormatSpecifierRange(startSpecifier, specifierLen) + << Arg->getSourceRange(); // Don't do any more checking. We will just emit // spurious errors. return false; @@ -1416,13 +1180,15 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier // have matching data arguments. if (!HandleAmount(FS.getFieldWidth(), diag::warn_printf_asterisk_width_missing_arg, - diag::warn_printf_asterisk_width_wrong_type)) { + diag::warn_printf_asterisk_width_wrong_type, + startSpecifier, specifierLen)) { return false; } if (!HandleAmount(FS.getPrecision(), diag::warn_printf_asterisk_precision_missing_arg, - diag::warn_printf_asterisk_precision_wrong_type)) { + diag::warn_printf_asterisk_precision_wrong_type, + startSpecifier, specifierLen)) { return false; } @@ -1434,6 +1200,12 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier // Continue checking the other format specifiers. return true; } + + if (!CS.consumesDataArgument()) { + // FIXME: Technically specifying a precision or field width here + // makes no sense. Worth issuing a warning at some point. + return true; + } ++NumConversions; @@ -1441,7 +1213,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier // a possible security issue. if (CS.getKind() == ConversionSpecifier::OutIntPtrArg) { S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_write_back) - << getFormatRange(); + << getFormatSpecifierRange(startSpecifier, specifierLen); // Continue checking the other format specifiers. return true; } @@ -1454,7 +1226,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier if (NumConversions > NumDataArgs) { S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_insufficient_data_args) - << getFormatRange(); + << getFormatSpecifierRange(startSpecifier, specifierLen); // Don't do any more checking. return false; } @@ -1468,14 +1240,13 @@ void CheckPrintfHandler::DoneProcessing() { if (!HasVAListArg && NumConversions < NumDataArgs) S.Diag(getDataArg(NumConversions+1)->getLocStart(), diag::warn_printf_too_many_data_args) - << getFormatRange(); + << getFormatStringRange(); } -void -Sema::AlternateCheckPrintfString(const StringLiteral *FExpr, - const Expr *OrigFormatExpr, - const CallExpr *TheCall, bool HasVAListArg, - unsigned format_idx, unsigned firstDataArg) { +void Sema::CheckPrintfString(const StringLiteral *FExpr, + const Expr *OrigFormatExpr, + const CallExpr *TheCall, bool HasVAListArg, + unsigned format_idx, unsigned firstDataArg) { // CHECK: is the format string a wide literal? if (FExpr->isWide()) { diff --git a/test/Sema/format-strings.c b/test/Sema/format-strings.c index 20e4dcd97f..166e8888e2 100644 --- a/test/Sema/format-strings.c +++ b/test/Sema/format-strings.c @@ -51,7 +51,7 @@ void check_conditional_literal(const char* s, int i) { printf(i == 1 ? "yes" : "no"); // no-warning printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}} - printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than '%' conversions}} + printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}} } void check_writeback_specifier() @@ -65,10 +65,10 @@ void check_writeback_specifier() void check_invalid_specifier(FILE* fp, char *buf) { - printf("%s%lb%d","unix",10,20); // expected-warning {{lid conversion '%lb'}} - fprintf(fp,"%%%l"); // expected-warning {{lid conversion '%l'}} + printf("%s%lb%d","unix",10,20); // expected-warning {{invalid conversion specifier 'b'}} + fprintf(fp,"%%%l"); // expected-warning {{incomplete format specifier}} sprintf(buf,"%%%%%ld%d%d", 1, 2, 3); // no-warning - snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning {{sion '%;'}} + snprintf(buf, 2, "%%%%%ld%;%d", 1, 2, 3); // expected-warning {{invalid conversion specifier ';'}} } void check_null_char_string(char* b) @@ -139,3 +139,21 @@ void torture(va_list v8) { vprintf ("%*.*d", v8); // no-warning } +void test10(int x, float f, int i) { + printf("%@", 12); // expected-warning{{invalid conversion specifier '@'}} + printf("\0"); // expected-warning{{format string contains '\0' within the string body}} + printf("xs\0"); // expected-warning{{format string contains '\0' within the string body}} + printf("%*d\n"); // expected-warning{{'*' specified field width is missing a matching 'int' argument}} + printf("%*.*d\n", x); // expected-warning{{'.*' specified field precision is missing a matching 'int' argument}} + printf("%*d\n", f, x); // expected-warning{{field width should have type 'int', but argument has type 'double'}} + printf("%*.*d\n", x, f, x); // expected-warning{{field precision should have type 'int', but argument has type 'double'}} + printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}} + printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}} + printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}} + printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}} + printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}} + printf("%"); // expected-warning{{incomplete format specifier}} + printf("%.d", x); // no-warning + printf("%.", x); // expected-warning{{incomplete format specifier}} +} + diff --git a/test/SemaObjC/format-strings-objc.m b/test/SemaObjC/format-strings-objc.m index e7550a758b..df52076582 100644 --- a/test/SemaObjC/format-strings-objc.m +++ b/test/SemaObjC/format-strings-objc.m @@ -35,7 +35,7 @@ extern void CFStringCreateWithFormat(CFStringRef format, ...) __attribute__((for void check_nslog(unsigned k) { NSLog(@"%d%%", k); // no-warning - NSLog(@"%s%lb%d", "unix", 10,20); // expected-warning {{lid conversion '%lb'}} + NSLog(@"%s%lb%d", "unix", 10,20); // expected-warning {{invalid conversion specifier 'b'}} } // Check type validation |