diff options
author | Ted Kremenek <kremenek@apple.com> | 2010-07-16 02:11:22 +0000 |
---|---|---|
committer | Ted Kremenek <kremenek@apple.com> | 2010-07-16 02:11:22 +0000 |
commit | 826a3457f737f1fc45a22954fd1bfde38160c165 (patch) | |
tree | 1eef44acc8d837ec3ac4505687d22a9b90baa8d6 /lib/Analysis/FormatString.cpp | |
parent | bee05c1206dcd525e0a1f066d166ad3e972a500e (diff) |
Add most of the boilerplate support for scanf format string checking. This includes
handling the parsing of scanf format strings and hooking the checking into Sema.
Most of this checking logic piggybacks on what was already there for checking printf format
strings, but the checking logic has been refactored to support both.
What is left to be done is to support argument type checking in format strings and of course
fix the usual tail of bugs that will follow.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108500 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Analysis/FormatString.cpp')
-rw-r--r-- | lib/Analysis/FormatString.cpp | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/lib/Analysis/FormatString.cpp b/lib/Analysis/FormatString.cpp new file mode 100644 index 0000000000..dcd18db16c --- /dev/null +++ b/lib/Analysis/FormatString.cpp @@ -0,0 +1,380 @@ +// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared details for processing format strings of printf and scanf +// (and friends). +// +//===----------------------------------------------------------------------===// + +#include "FormatStringParsing.h" + +using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::FormatStringHandler; +using clang::analyze_format_string::FormatSpecifier; +using clang::analyze_format_string::LengthModifier; +using clang::analyze_format_string::OptionalAmount; +using clang::analyze_format_string::PositionContext; +using namespace clang; + +// Key function to FormatStringHandler. +FormatStringHandler::~FormatStringHandler() {} + +//===----------------------------------------------------------------------===// +// Functions for parsing format strings components in both printf and +// scanf format strings. +//===----------------------------------------------------------------------===// + +OptionalAmount +clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { + const char *I = Beg; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + unsigned accumulator = 0; + bool hasDigits = false; + + for ( ; I != E; ++I) { + char c = *I; + if (c >= '0' && c <= '9') { + hasDigits = true; + accumulator = (accumulator * 10) + (c - '0'); + continue; + } + + if (hasDigits) + return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, + false); + + break; + } + + return OptionalAmount(); +} + +OptionalAmount +clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, + const char *E, + unsigned &argIndex) { + if (*Beg == '*') { + ++Beg; + return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); + } + + return ParseAmount(Beg, E); +} + +OptionalAmount +clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, + const char *Start, + const char *&Beg, + const char *E, + PositionContext p) { + if (*Beg == '*') { + const char *I = Beg + 1; + const OptionalAmount &Amt = ParseAmount(I, E); + + if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return OptionalAmount(false); + } + + assert(Amt.getHowSpecified() == OptionalAmount::Constant); + + if (*I == '$') { + // Handle positional arguments + + // Special case: '*0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Beg, I - Beg + 1); + return OptionalAmount(false); + } + + const char *Tmp = Beg; + Beg = ++I; + + return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, + Tmp, 0, true); + } + + H.HandleInvalidPosition(Beg, I - Beg, p); + return OptionalAmount(false); + } + + return ParseAmount(Beg, E); +} + + +bool +clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, + FormatSpecifier &CS, + const char *Start, + const char *&Beg, const char *E, + unsigned *argIndex) { + // FIXME: Support negative field widths. + if (argIndex) { + CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); + } + else { + const OptionalAmount Amt = + ParsePositionAmount(H, Start, Beg, E, + analyze_format_string::FieldWidthPos); + + if (Amt.isInvalid()) + return true; + CS.setFieldWidth(Amt); + } + return false; +} + +bool +clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, + FormatSpecifier &FS, + const char *Start, + const char *&Beg, + const char *E) { + const char *I = Beg; + + const OptionalAmount &Amt = ParseAmount(I, E); + + if (I == E) { + // No more characters left? + H.HandleIncompleteSpecifier(Start, E - Start); + return true; + } + + if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { + // Special case: '%0$', since this is an easy mistake. + if (Amt.getConstantAmount() == 0) { + H.HandleZeroPosition(Start, I - Start); + return true; + } + + FS.setArgIndex(Amt.getConstantAmount() - 1); + FS.setUsesPositionalArg(); + // Update the caller's pointer if we decided to consume + // these characters. + Beg = I; + return false; + } + + return false; +} + +bool +clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, + const char *&I, + const char *E) { + LengthModifier::Kind lmKind = LengthModifier::None; + const char *lmPosition = I; + switch (*I) { + default: + return false; + case 'h': + ++I; + lmKind = (I != E && *I == 'h') ? + ++I, LengthModifier::AsChar : LengthModifier::AsShort; + break; + case 'l': + ++I; + lmKind = (I != E && *I == 'l') ? + ++I, LengthModifier::AsLongLong : LengthModifier::AsLong; + break; + case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; + case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; + case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; + case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; + case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; + } + LengthModifier lm(lmPosition, lmKind); + FS.setLengthModifier(lm); + return true; +} + +//===----------------------------------------------------------------------===// +// Methods on ArgTypeResult. +//===----------------------------------------------------------------------===// + +bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { + switch (K) { + case InvalidTy: + assert(false && "ArgTypeResult must be valid"); + return true; + + case UnknownTy: + return true; + + case SpecificTy: { + argTy = C.getCanonicalType(argTy).getUnqualifiedType(); + if (T == argTy) + return true; + if (const BuiltinType *BT = argTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + default: + break; + case BuiltinType::Char_S: + case BuiltinType::SChar: + return T == C.UnsignedCharTy; + case BuiltinType::Char_U: + case BuiltinType::UChar: + return T == C.SignedCharTy; + case BuiltinType::Short: + return T == C.UnsignedShortTy; + case BuiltinType::UShort: + return T == C.ShortTy; + case BuiltinType::Int: + return T == C.UnsignedIntTy; + case BuiltinType::UInt: + return T == C.IntTy; + case BuiltinType::Long: + return T == C.UnsignedLongTy; + case BuiltinType::ULong: + return T == C.LongTy; + case BuiltinType::LongLong: + return T == C.UnsignedLongLongTy; + case BuiltinType::ULongLong: + return T == C.LongLongTy; + } + return false; + } + + case CStrTy: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + QualType pointeeTy = PT->getPointeeType(); + if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) + switch (BT->getKind()) { + case BuiltinType::Void: + case BuiltinType::Char_U: + case BuiltinType::UChar: + case BuiltinType::Char_S: + case BuiltinType::SChar: + return true; + default: + break; + } + + return false; + } + + case WCStrTy: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return false; + QualType pointeeTy = + C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); + return pointeeTy == C.getWCharType(); + } + + case CPointerTy: + return argTy->getAs<PointerType>() != NULL || + argTy->getAs<ObjCObjectPointerType>() != NULL; + + case ObjCPointerTy: + return argTy->getAs<ObjCObjectPointerType>() != NULL; + } + + // FIXME: Should be unreachable, but Clang is currently emitting + // a warning. + return false; +} + +QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { + switch (K) { + case InvalidTy: + assert(false && "No representative type for Invalid ArgTypeResult"); + // Fall-through. + case UnknownTy: + return QualType(); + case SpecificTy: + return T; + case CStrTy: + return C.getPointerType(C.CharTy); + case WCStrTy: + return C.getPointerType(C.getWCharType()); + case ObjCPointerTy: + return C.ObjCBuiltinIdTy; + case CPointerTy: + return C.VoidPtrTy; + } + + // FIXME: Should be unreachable, but Clang is currently emitting + // a warning. + return QualType(); +} + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +ArgTypeResult +analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { + return Ctx.IntTy; +} + +//===----------------------------------------------------------------------===// +// Methods on LengthModifier. +//===----------------------------------------------------------------------===// + +const char * +analyze_format_string::LengthModifier::toString() const { + switch (kind) { + case AsChar: + return "hh"; + case AsShort: + return "h"; + case AsLong: // or AsWideChar + return "l"; + case AsLongLong: + return "ll"; + case AsIntMax: + return "j"; + case AsSizeT: + return "z"; + case AsPtrDiff: + return "t"; + case AsLongDouble: + return "L"; + case None: + return ""; + } + return NULL; +} + +//===----------------------------------------------------------------------===// +// Methods on OptionalAmount. +//===----------------------------------------------------------------------===// + +void +analyze_format_string::OptionalAmount::toString(llvm::raw_ostream &os) const { + switch (hs) { + case Invalid: + case NotSpecified: + return; + case Arg: + if (UsesDotPrefix) + os << "."; + if (usesPositionalArg()) + os << "*" << getPositionalArgIndex() << "$"; + else + os << "*"; + break; + case Constant: + if (UsesDotPrefix) + os << "."; + os << amt; + break; + } +} + |