aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clang/Analysis/Analyses/PrintfFormatString.h31
-rw-r--r--include/clang/Basic/DiagnosticSemaKinds.td4
-rw-r--r--lib/Analysis/PrintfFormatString.cpp34
-rw-r--r--lib/Sema/SemaChecking.cpp83
-rw-r--r--test/Sema/format-strings.c4
5 files changed, 102 insertions, 54 deletions
diff --git a/include/clang/Analysis/Analyses/PrintfFormatString.h b/include/clang/Analysis/Analyses/PrintfFormatString.h
index f1e8220908..f66892f185 100644
--- a/include/clang/Analysis/Analyses/PrintfFormatString.h
+++ b/include/clang/Analysis/Analyses/PrintfFormatString.h
@@ -152,18 +152,21 @@ class OptionalAmount {
public:
enum HowSpecified { NotSpecified, Constant, Arg };
- OptionalAmount(HowSpecified h, const char *st)
- : start(st), hs(h), amt(0) {}
+ OptionalAmount(HowSpecified h, unsigned i, const char *st)
+ : start(st), hs(h), amt(i) {}
OptionalAmount()
: start(0), hs(NotSpecified), amt(0) {}
- OptionalAmount(unsigned i, const char *st)
- : start(st), hs(Constant), amt(i) {}
-
HowSpecified getHowSpecified() const { return hs; }
+
bool hasDataArgument() const { return hs == Arg; }
+ unsigned getArgIndex() const {
+ assert(hasDataArgument());
+ return amt;
+ }
+
unsigned getConstantAmount() const {
assert(hs == Constant);
return amt;
@@ -188,14 +191,14 @@ class FormatSpecifier {
unsigned HasSpacePrefix : 1;
unsigned HasAlternativeForm : 1;
unsigned HasLeadingZeroes : 1;
- unsigned flags : 5;
+ unsigned argIndex;
ConversionSpecifier CS;
OptionalAmount FieldWidth;
OptionalAmount Precision;
public:
FormatSpecifier() : LM(None),
IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0),
- HasAlternativeForm(0), HasLeadingZeroes(0) {}
+ HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {}
static FormatSpecifier Parse(const char *beg, const char *end);
@@ -212,6 +215,16 @@ public:
void setHasAlternativeForm() { HasAlternativeForm = 1; }
void setHasLeadingZeros() { HasLeadingZeroes = 1; }
+ void setArgIndex(unsigned i) {
+ assert(CS.consumesDataArgument());
+ argIndex = i;
+ }
+
+ unsigned getArgIndex() const {
+ assert(CS.consumesDataArgument());
+ return argIndex;
+ }
+
// Methods for querying the format specifier.
const ConversionSpecifier &getConversionSpecifier() const {
@@ -262,10 +275,10 @@ public:
virtual void HandleNullChar(const char *nullCharacter) {}
- virtual void
+ virtual bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
- unsigned specifierLen) {}
+ unsigned specifierLen) { return true; }
virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 52fbbcbb00..3d9253db33 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2521,8 +2521,8 @@ def warn_printf_write_back : Warning<
InGroup<FormatSecurity>;
def warn_printf_insufficient_data_args : Warning<
"more '%%' conversions than data arguments">, InGroup<Format>;
-def warn_printf_too_many_data_args : Warning<
- "more data arguments than format specifiers">, InGroup<FormatExtraArgs>;
+def warn_printf_data_arg_not_used : Warning<
+ "data argument not used by format string">, InGroup<FormatExtraArgs>;
def warn_printf_invalid_conversion : Warning<
"invalid conversion specifier '%0'">, InGroup<Format>;
def warn_printf_incomplete_specifier : Warning<
diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp
index 3aee57cb81..9c0f813964 100644
--- a/lib/Analysis/PrintfFormatString.cpp
+++ b/lib/Analysis/PrintfFormatString.cpp
@@ -62,7 +62,8 @@ public:
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
+static OptionalAmount ParseAmount(const char *&Beg, const char *E,
+ unsigned &argIndex) {
const char *I = Beg;
UpdateOnReturn <const char*> UpdateBeg(Beg, I);
@@ -78,11 +79,11 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
}
if (foundDigits)
- return OptionalAmount(accumulator, Beg);
+ return OptionalAmount(OptionalAmount::Constant, accumulator, Beg);
if (c == '*') {
++I;
- return OptionalAmount(OptionalAmount::Arg, Beg);
+ return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg);
}
break;
@@ -93,7 +94,8 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
const char *&Beg,
- const char *E) {
+ const char *E,
+ unsigned &argIndex) {
using namespace clang::analyze_printf;
@@ -149,7 +151,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
}
// Look for the field width (if any).
- FS.setFieldWidth(ParseAmount(I, E));
+ FS.setFieldWidth(ParseAmount(I, E, argIndex));
if (I == E) {
// No more characters left?
@@ -165,7 +167,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
return true;
}
- FS.setPrecision(ParseAmount(I, E));
+ FS.setPrecision(ParseAmount(I, E, argIndex));
if (I == E) {
// No more characters left?
@@ -241,20 +243,26 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
// Glibc specific.
case 'm': k = ConversionSpecifier::PrintErrno; break;
}
- FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k));
+ ConversionSpecifier CS(conversionPosition, k);
+ FS.setConversionSpecifier(CS);
+ if (CS.consumesDataArgument())
+ FS.setArgIndex(argIndex++);
if (k == ConversionSpecifier::InvalidSpecifier) {
- H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
- return false; // Keep processing format specifiers.
+ // Assume the conversion takes one argument.
+ return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
}
return FormatSpecifierResult(Start, FS);
}
bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
const char *I, const char *E) {
+
+ unsigned argIndex = 0;
+
// Keep looking for a format specifier until we have exhausted the string.
while (I != E) {
- const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E);
+ const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
// Did a fail-stop error of any kind occur when parsing the specifier?
// If so, don't do any more processing.
if (FSR.shouldStop())
@@ -430,7 +438,7 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
return Ctx.LongDoubleTy;
return Ctx.DoubleTy;
}
-
+
switch (CS.getKind()) {
case ConversionSpecifier::CStrArg:
return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
@@ -438,11 +446,11 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
// FIXME: This appears to be Mac OS X specific.
return ArgTypeResult::WCStrTy;
case ConversionSpecifier::CArg:
- return Ctx.WCharTy;
+ return Ctx.WCharTy;
default:
break;
}
-
+
// FIXME: Handle other cases.
return ArgTypeResult();
}
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 019b79811d..2f71c778ec 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -955,6 +955,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull,
/// FormatGuard: Automatic Protection From printf Format String
/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
///
+/// TODO:
/// Functionality implemented:
///
/// We can statically check the following properties for string
@@ -965,7 +966,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull,
/// data arguments?
///
/// (2) Does each format conversion correctly match the type of the
-/// corresponding data argument? (TODO)
+/// corresponding data argument?
///
/// Moreover, for all printf functions we can:
///
@@ -984,7 +985,6 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull,
///
/// All of these checks can be done by parsing the format string.
///
-/// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
void
Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
unsigned format_idx, unsigned firstDataArg) {
@@ -1044,13 +1044,13 @@ class CheckPrintfHandler : public analyze_printf::FormatStringHandler {
Sema &S;
const StringLiteral *FExpr;
const Expr *OrigFormatExpr;
- unsigned NumConversions;
const unsigned NumDataArgs;
const bool IsObjCLiteral;
const char *Beg; // Start of format string.
const bool HasVAListArg;
const CallExpr *TheCall;
unsigned FormatIdx;
+ llvm::BitVector CoveredArgs;
public:
CheckPrintfHandler(Sema &s, const StringLiteral *fexpr,
const Expr *origFormatExpr,
@@ -1058,17 +1058,20 @@ public:
const char *beg, bool hasVAListArg,
const CallExpr *theCall, unsigned formatIdx)
: S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr),
- NumConversions(0), NumDataArgs(numDataArgs),
+ NumDataArgs(numDataArgs),
IsObjCLiteral(isObjCLiteral), Beg(beg),
HasVAListArg(hasVAListArg),
- TheCall(theCall), FormatIdx(formatIdx) {}
+ TheCall(theCall), FormatIdx(formatIdx) {
+ CoveredArgs.resize(numDataArgs);
+ CoveredArgs.reset();
+ }
void DoneProcessing();
void HandleIncompleteFormatSpecifier(const char *startSpecifier,
unsigned specifierLen);
- void
+ bool
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen);
@@ -1117,18 +1120,35 @@ HandleIncompleteFormatSpecifier(const char *startSpecifier,
<< getFormatSpecifierRange(startSpecifier, specifierLen);
}
-void CheckPrintfHandler::
+bool CheckPrintfHandler::
HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS,
const char *startSpecifier,
unsigned specifierLen) {
- ++NumConversions;
+ unsigned argIndex = FS.getArgIndex();
+ bool keepGoing = true;
+ if (argIndex < NumDataArgs) {
+ // Consider the argument coverered, even though the specifier doesn't
+ // make sense.
+ CoveredArgs.set(argIndex);
+ }
+ else {
+ // If argIndex exceeds the number of data arguments we
+ // don't issue a warning because that is just a cascade of warnings (and
+ // they may have intended '%%' anyway). We don't want to continue processing
+ // the format string after this point, however, as we will like just get
+ // gibberish when trying to match arguments.
+ keepGoing = false;
+ }
+
const analyze_printf::ConversionSpecifier &CS =
FS.getConversionSpecifier();
SourceLocation Loc = getLocationOfByte(CS.getStart());
S.Diag(Loc, diag::warn_printf_invalid_conversion)
<< llvm::StringRef(CS.getStart(), CS.getLength())
<< getFormatSpecifierRange(startSpecifier, specifierLen);
+
+ return keepGoing;
}
void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
@@ -1139,7 +1159,7 @@ void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) {
}
const Expr *CheckPrintfHandler::getDataArg(unsigned i) const {
- return TheCall->getArg(FormatIdx + i);
+ return TheCall->getArg(FormatIdx + i + 1);
}
@@ -1162,9 +1182,9 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
unsigned specifierLen) {
if (Amt.hasDataArgument()) {
- ++NumConversions;
if (!HasVAListArg) {
- if (NumConversions > NumDataArgs) {
+ unsigned argIndex = Amt.getArgIndex();
+ if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
// Don't do any more checking. We will just emit
@@ -1176,7 +1196,8 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt,
// Although not in conformance with C99, we also allow the argument to be
// an 'unsigned int' as that is a reasonably safe case. GCC also
// doesn't emit a warning for that case.
- const Expr *Arg = getDataArg(NumConversions);
+ CoveredArgs.set(argIndex);
+ const Expr *Arg = getDataArg(argIndex);
QualType T = Arg->getType();
const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context);
@@ -1221,22 +1242,21 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
return false;
}
- // Check for using an Objective-C specific conversion specifier
- // in a non-ObjC literal.
- if (!IsObjCLiteral && CS.isObjCArg()) {
- HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
-
- // Continue checking the other format specifiers.
- return true;
- }
-
if (!CS.consumesDataArgument()) {
// FIXME: Technically specifying a precision or field width here
// makes no sense. Worth issuing a warning at some point.
return true;
}
- ++NumConversions;
+ // Consume the argument.
+ unsigned argIndex = FS.getArgIndex();
+ CoveredArgs.set(argIndex);
+
+ // Check for using an Objective-C specific conversion specifier
+ // in a non-ObjC literal.
+ if (!IsObjCLiteral && CS.isObjCArg()) {
+ return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen);
+ }
// Are we using '%n'? Issue a warning about this being
// a possible security issue.
@@ -1270,7 +1290,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
if (HasVAListArg)
return true;
- if (NumConversions > NumDataArgs) {
+ if (argIndex >= NumDataArgs) {
S.Diag(getLocationOfByte(CS.getStart()),
diag::warn_printf_insufficient_data_args)
<< getFormatSpecifierRange(startSpecifier, specifierLen);
@@ -1280,7 +1300,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
// Now type check the data expression that matches the
// format specifier.
- const Expr *Ex = getDataArg(NumConversions);
+ const Expr *Ex = getDataArg(argIndex);
const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context);
if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) {
// Check if we didn't match because of an implicit cast from a 'char'
@@ -1304,10 +1324,17 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier
void CheckPrintfHandler::DoneProcessing() {
// Does the number of data arguments exceed the number of
// format conversions in the format string?
- if (!HasVAListArg && NumConversions < NumDataArgs)
- S.Diag(getDataArg(NumConversions+1)->getLocStart(),
- diag::warn_printf_too_many_data_args)
- << getFormatStringRange();
+ if (!HasVAListArg) {
+ // Find any arguments that weren't covered.
+ CoveredArgs.flip();
+ signed notCoveredArg = CoveredArgs.find_first();
+ if (notCoveredArg >= 0) {
+ assert((unsigned)notCoveredArg < NumDataArgs);
+ S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(),
+ diag::warn_printf_data_arg_not_used)
+ << getFormatStringRange();
+ }
+ }
}
void Sema::CheckPrintfString(const StringLiteral *FExpr,
diff --git a/test/Sema/format-strings.c b/test/Sema/format-strings.c
index e2686a885e..191996004d 100644
--- a/test/Sema/format-strings.c
+++ b/test/Sema/format-strings.c
@@ -55,7 +55,7 @@ void check_conditional_literal(const char* s, int i) {
printf(i == 1 ? "yes" : "no"); // no-warning
printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning
printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}}
- printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}}
+ printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}}
}
void check_writeback_specifier()
@@ -155,7 +155,7 @@ void test10(int x, float f, int i, long long lli) {
printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}}
printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}}
printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}}
- printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}}
+ printf("%d\n", x, x); // expected-warning{{data argument not used by format string}}
printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}}
printf("%"); // expected-warning{{incomplete format specifier}}
printf("%.d", x); // no-warning