aboutsummaryrefslogtreecommitdiff
path: root/lib/Analysis/PrintfFormatString.cpp
diff options
context:
space:
mode:
authorTed Kremenek <kremenek@apple.com>2010-07-16 02:11:22 +0000
committerTed Kremenek <kremenek@apple.com>2010-07-16 02:11:22 +0000
commit826a3457f737f1fc45a22954fd1bfde38160c165 (patch)
tree1eef44acc8d837ec3ac4505687d22a9b90baa8d6 /lib/Analysis/PrintfFormatString.cpp
parentbee05c1206dcd525e0a1f066d166ad3e972a500e (diff)
Add most of the boilerplate support for scanf format string checking. This includes
handling the parsing of scanf format strings and hooking the checking into Sema. Most of this checking logic piggybacks on what was already there for checking printf format strings, but the checking logic has been refactored to support both. What is left to be done is to support argument type checking in format strings and of course fix the usual tail of bugs that will follow. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108500 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Analysis/PrintfFormatString.cpp')
-rw-r--r--lib/Analysis/PrintfFormatString.cpp448
1 files changed, 43 insertions, 405 deletions
diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp
index 558d38af0c..584fc12002 100644
--- a/lib/Analysis/PrintfFormatString.cpp
+++ b/lib/Analysis/PrintfFormatString.cpp
@@ -1,4 +1,4 @@
-//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==//
+//== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,141 +12,28 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/Analysis/Analyses/PrintfFormatString.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/Type.h"
-#include "llvm/Support/raw_ostream.h"
+#include "clang/Analysis/Analyses/FormatString.h"
+#include "FormatStringParsing.h"
-using clang::analyze_printf::ArgTypeResult;
-using clang::analyze_printf::FormatSpecifier;
-using clang::analyze_printf::FormatStringHandler;
-using clang::analyze_printf::OptionalAmount;
-using clang::analyze_printf::PositionContext;
+using clang::analyze_format_string::ArgTypeResult;
+using clang::analyze_format_string::FormatStringHandler;
+using clang::analyze_format_string::LengthModifier;
+using clang::analyze_format_string::OptionalAmount;
using clang::analyze_printf::ConversionSpecifier;
-using clang::analyze_printf::LengthModifier;
+using clang::analyze_printf::PrintfSpecifier;
using namespace clang;
-namespace {
-class FormatSpecifierResult {
- FormatSpecifier FS;
- const char *Start;
- bool Stop;
-public:
- FormatSpecifierResult(bool stop = false)
- : Start(0), Stop(stop) {}
- FormatSpecifierResult(const char *start,
- const FormatSpecifier &fs)
- : FS(fs), Start(start), Stop(false) {}
-
- const char *getStart() const { return Start; }
- bool shouldStop() const { return Stop; }
- bool hasValue() const { return Start != 0; }
- const FormatSpecifier &getValue() const {
- assert(hasValue());
- return FS;
- }
- const FormatSpecifier &getValue() { return FS; }
-};
-} // end anonymous namespace
-
-template <typename T>
-class UpdateOnReturn {
- T &ValueToUpdate;
- const T &ValueToCopy;
-public:
- UpdateOnReturn(T &valueToUpdate, const T &valueToCopy)
- : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {}
-
- ~UpdateOnReturn() {
- ValueToUpdate = ValueToCopy;
- }
-};
+typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
+ PrintfSpecifierResult;
//===----------------------------------------------------------------------===//
// Methods for parsing format strings.
//===----------------------------------------------------------------------===//
-static OptionalAmount ParseAmount(const char *&Beg, const char *E) {
- const char *I = Beg;
- UpdateOnReturn <const char*> UpdateBeg(Beg, I);
-
- unsigned accumulator = 0;
- bool hasDigits = false;
-
- for ( ; I != E; ++I) {
- char c = *I;
- if (c >= '0' && c <= '9') {
- hasDigits = true;
- accumulator = (accumulator * 10) + (c - '0');
- continue;
- }
-
- if (hasDigits)
- return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
- false);
-
- break;
- }
-
- return OptionalAmount();
-}
-
-static OptionalAmount ParseNonPositionAmount(const char *&Beg, const char *E,
- unsigned &argIndex) {
- if (*Beg == '*') {
- ++Beg;
- return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
- }
-
- return ParseAmount(Beg, E);
-}
-
-static OptionalAmount ParsePositionAmount(FormatStringHandler &H,
- const char *Start,
- const char *&Beg, const char *E,
- PositionContext p) {
- if (*Beg == '*') {
- const char *I = Beg + 1;
- const OptionalAmount &Amt = ParseAmount(I, E);
-
- if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
- H.HandleInvalidPosition(Beg, I - Beg, p);
- return OptionalAmount(false);
- }
-
- if (I== E) {
- // No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
- return OptionalAmount(false);
- }
-
- assert(Amt.getHowSpecified() == OptionalAmount::Constant);
-
- if (*I == '$') {
- // Handle positional arguments
-
- // Special case: '*0$', since this is an easy mistake.
- if (Amt.getConstantAmount() == 0) {
- H.HandleZeroPosition(Beg, I - Beg + 1);
- return OptionalAmount(false);
- }
-
- const char *Tmp = Beg;
- Beg = ++I;
-
- return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
- Tmp, 0, true);
- }
-
- H.HandleInvalidPosition(Beg, I - Beg, p);
- return OptionalAmount(false);
- }
-
- return ParseAmount(Beg, E);
-}
+using analyze_format_string::ParseNonPositionAmount;
-static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
+static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
const char *Start, const char *&Beg, const char *E,
unsigned *argIndex) {
if (argIndex) {
@@ -154,7 +41,7 @@ static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
}
else {
const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
- analyze_printf::PrecisionPos);
+ analyze_format_string::PrecisionPos);
if (Amt.isInvalid())
return true;
FS.setPrecision(Amt);
@@ -162,57 +49,7 @@ static bool ParsePrecision(FormatStringHandler &H, FormatSpecifier &FS,
return false;
}
-static bool ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &FS,
- const char *Start, const char *&Beg, const char *E,
- unsigned *argIndex) {
- // FIXME: Support negative field widths.
- if (argIndex) {
- FS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
- }
- else {
- const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
- analyze_printf::FieldWidthPos);
- if (Amt.isInvalid())
- return true;
- FS.setFieldWidth(Amt);
- }
- return false;
-}
-
-static bool ParseArgPosition(FormatStringHandler &H,
- FormatSpecifier &FS, const char *Start,
- const char *&Beg, const char *E) {
-
- using namespace clang::analyze_printf;
- const char *I = Beg;
-
- const OptionalAmount &Amt = ParseAmount(I, E);
-
- if (I == E) {
- // No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
- return true;
- }
-
- if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
- // Special case: '%0$', since this is an easy mistake.
- if (Amt.getConstantAmount() == 0) {
- H.HandleZeroPosition(Start, I - Start);
- return true;
- }
-
- FS.setArgIndex(Amt.getConstantAmount() - 1);
- FS.setUsesPositionalArg();
- // Update the caller's pointer if we decided to consume
- // these characters.
- Beg = I;
- return false;
- }
-
- return false;
-}
-
-static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
+static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
const char *&Beg,
const char *E,
unsigned &argIndex) {
@@ -243,17 +80,17 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
- FormatSpecifier FS;
+ PrintfSpecifier FS;
if (ParseArgPosition(H, FS, Start, I, E))
return true;
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -274,7 +111,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -285,7 +122,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -293,7 +130,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
if (*I == '.') {
++I;
if (I == E) {
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -303,39 +140,15 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
if (I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
}
// Look for the length modifier.
- LengthModifier::Kind lmKind = LengthModifier::None;
- const char *lmPosition = I;
- switch (*I) {
- default:
- break;
- case 'h':
- ++I;
- lmKind = (I != E && *I == 'h') ?
- ++I, LengthModifier::AsChar : LengthModifier::AsShort;
- break;
- case 'l':
- ++I;
- lmKind = (I != E && *I == 'l') ?
- ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
- break;
- case 'j': lmKind = LengthModifier::AsIntMax; ++I; break;
- case 'z': lmKind = LengthModifier::AsSizeT; ++I; break;
- case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break;
- case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
- case 'q': lmKind = LengthModifier::AsLongLong; ++I; break;
- }
- LengthModifier lm(lmPosition, lmKind);
- FS.setLengthModifier(lm);
-
- if (I == E) {
+ if (ParseLengthModifier(FS, I, E) && I == E) {
// No more characters left?
- H.HandleIncompleteFormatSpecifier(Start, E - Start);
+ H.HandleIncompleteSpecifier(Start, E - Start);
return true;
}
@@ -386,19 +199,20 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H,
if (k == ConversionSpecifier::InvalidSpecifier) {
// Assume the conversion takes one argument.
- return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg);
+ return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
}
- return FormatSpecifierResult(Start, FS);
+ return PrintfSpecifierResult(Start, FS);
}
-bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
- const char *I, const char *E) {
+bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
+ const char *I,
+ const char *E) {
unsigned argIndex = 0;
// Keep looking for a format specifier until we have exhausted the string.
while (I != E) {
- const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex);
+ const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
// Did a fail-stop error of any kind occur when parsing the specifier?
// If so, don't do any more processing.
if (FSR.shouldStop())
@@ -408,7 +222,7 @@ bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
if (!FSR.hasValue())
continue;
// We have a format specifier. Pass it to the callback.
- if (!H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(),
+ if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
I - FSR.getStart()))
return true;
}
@@ -416,129 +230,6 @@ bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H,
return false;
}
-FormatStringHandler::~FormatStringHandler() {}
-
-//===----------------------------------------------------------------------===//
-// Methods on ArgTypeResult.
-//===----------------------------------------------------------------------===//
-
-bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
- switch (K) {
- case InvalidTy:
- assert(false && "ArgTypeResult must be valid");
- return true;
-
- case UnknownTy:
- return true;
-
- case SpecificTy: {
- argTy = C.getCanonicalType(argTy).getUnqualifiedType();
- if (T == argTy)
- return true;
- if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
- switch (BT->getKind()) {
- default:
- break;
- case BuiltinType::Char_S:
- case BuiltinType::SChar:
- return T == C.UnsignedCharTy;
- case BuiltinType::Char_U:
- case BuiltinType::UChar:
- return T == C.SignedCharTy;
- case BuiltinType::Short:
- return T == C.UnsignedShortTy;
- case BuiltinType::UShort:
- return T == C.ShortTy;
- case BuiltinType::Int:
- return T == C.UnsignedIntTy;
- case BuiltinType::UInt:
- return T == C.IntTy;
- case BuiltinType::Long:
- return T == C.UnsignedLongTy;
- case BuiltinType::ULong:
- return T == C.LongTy;
- case BuiltinType::LongLong:
- return T == C.UnsignedLongLongTy;
- case BuiltinType::ULongLong:
- return T == C.LongLongTy;
- }
- return false;
- }
-
- case CStrTy: {
- const PointerType *PT = argTy->getAs<PointerType>();
- if (!PT)
- return false;
- QualType pointeeTy = PT->getPointeeType();
- if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
- switch (BT->getKind()) {
- case BuiltinType::Void:
- case BuiltinType::Char_U:
- case BuiltinType::UChar:
- case BuiltinType::Char_S:
- case BuiltinType::SChar:
- return true;
- default:
- break;
- }
-
- return false;
- }
-
- case WCStrTy: {
- const PointerType *PT = argTy->getAs<PointerType>();
- if (!PT)
- return false;
- QualType pointeeTy =
- C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
- return pointeeTy == C.getWCharType();
- }
-
- case CPointerTy:
- return argTy->getAs<PointerType>() != NULL ||
- argTy->getAs<ObjCObjectPointerType>() != NULL;
-
- case ObjCPointerTy:
- return argTy->getAs<ObjCObjectPointerType>() != NULL;
- }
-
- // FIXME: Should be unreachable, but Clang is currently emitting
- // a warning.
- return false;
-}
-
-QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
- switch (K) {
- case InvalidTy:
- assert(false && "No representative type for Invalid ArgTypeResult");
- // Fall-through.
- case UnknownTy:
- return QualType();
- case SpecificTy:
- return T;
- case CStrTy:
- return C.getPointerType(C.CharTy);
- case WCStrTy:
- return C.getPointerType(C.getWCharType());
- case ObjCPointerTy:
- return C.ObjCBuiltinIdTy;
- case CPointerTy:
- return C.VoidPtrTy;
- }
-
- // FIXME: Should be unreachable, but Clang is currently emitting
- // a warning.
- return QualType();
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-ArgTypeResult OptionalAmount::getArgType(ASTContext &Ctx) const {
- return Ctx.IntTy;
-}
-
//===----------------------------------------------------------------------===//
// Methods on ConversionSpecifier.
//===----------------------------------------------------------------------===//
@@ -579,63 +270,10 @@ const char *ConversionSpecifier::toString() const {
}
//===----------------------------------------------------------------------===//
-// Methods on LengthModifier.
-//===----------------------------------------------------------------------===//
-
-const char *LengthModifier::toString() const {
- switch (kind) {
- case AsChar:
- return "hh";
- case AsShort:
- return "h";
- case AsLong: // or AsWideChar
- return "l";
- case AsLongLong:
- return "ll";
- case AsIntMax:
- return "j";
- case AsSizeT:
- return "z";
- case AsPtrDiff:
- return "t";
- case AsLongDouble:
- return "L";
- case None:
- return "";
- }
- return NULL;
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on OptionalAmount.
-//===----------------------------------------------------------------------===//
-
-void OptionalAmount::toString(llvm::raw_ostream &os) const {
- switch (hs) {
- case Invalid:
- case NotSpecified:
- return;
- case Arg:
- if (UsesDotPrefix)
- os << ".";
- if (usesPositionalArg())
- os << "*" << getPositionalArgIndex() << "$";
- else
- os << "*";
- break;
- case Constant:
- if (UsesDotPrefix)
- os << ".";
- os << amt;
- break;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Methods on FormatSpecifier.
+// Methods on PrintfSpecifier.
//===----------------------------------------------------------------------===//
-ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
+ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
if (!CS.consumesDataArgument())
return ArgTypeResult::Invalid();
@@ -702,7 +340,7 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const {
return ArgTypeResult();
}
-bool FormatSpecifier::fixType(QualType QT) {
+bool PrintfSpecifier::fixType(QualType QT) {
// Handle strings first (char *, wchar_t *)
if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
CS.setKind(ConversionSpecifier::CStrArg);
@@ -783,9 +421,9 @@ bool FormatSpecifier::fixType(QualType QT) {
return true;
}
-void FormatSpecifier::toString(llvm::raw_ostream &os) const {
+void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
// Whilst some features have no defined order, we are using the order
- // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
+ // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ¤7.19.6.1)
os << "%";
// Positional args
@@ -810,7 +448,7 @@ void FormatSpecifier::toString(llvm::raw_ostream &os) const {
os << CS.toString();
}
-bool FormatSpecifier::hasValidPlusPrefix() const {
+bool PrintfSpecifier::hasValidPlusPrefix() const {
if (!HasPlusPrefix)
return true;
@@ -833,7 +471,7 @@ bool FormatSpecifier::hasValidPlusPrefix() const {
}
}
-bool FormatSpecifier::hasValidAlternativeForm() const {
+bool PrintfSpecifier::hasValidAlternativeForm() const {
if (!HasAlternativeForm)
return true;
@@ -856,7 +494,7 @@ bool FormatSpecifier::hasValidAlternativeForm() const {
}
}
-bool FormatSpecifier::hasValidLeadingZeros() const {
+bool PrintfSpecifier::hasValidLeadingZeros() const {
if (!HasLeadingZeroes)
return true;
@@ -883,7 +521,7 @@ bool FormatSpecifier::hasValidLeadingZeros() const {
}
}
-bool FormatSpecifier::hasValidSpacePrefix() const {
+bool PrintfSpecifier::hasValidSpacePrefix() const {
if (!HasSpacePrefix)
return true;
@@ -906,7 +544,7 @@ bool FormatSpecifier::hasValidSpacePrefix() const {
}
}
-bool FormatSpecifier::hasValidLeftJustified() const {
+bool PrintfSpecifier::hasValidLeftJustified() const {
if (!IsLeftJustified)
return true;
@@ -920,7 +558,7 @@ bool FormatSpecifier::hasValidLeftJustified() const {
}
}
-bool FormatSpecifier::hasValidLengthModifier() const {
+bool PrintfSpecifier::hasValidLengthModifier() const {
switch (LM.getKind()) {
case LengthModifier::None:
return true;
@@ -988,7 +626,7 @@ bool FormatSpecifier::hasValidLengthModifier() const {
return false;
}
-bool FormatSpecifier::hasValidPrecision() const {
+bool PrintfSpecifier::hasValidPrecision() const {
if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
return true;
@@ -1015,7 +653,7 @@ bool FormatSpecifier::hasValidPrecision() const {
return false;
}
}
-bool FormatSpecifier::hasValidFieldWidth() const {
+bool PrintfSpecifier::hasValidFieldWidth() const {
if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
return true;