// FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Shared details for processing format strings of printf and scanf // (and friends). // //===----------------------------------------------------------------------===// #include "FormatStringParsing.h" #include "clang/Basic/LangOptions.h" using clang::analyze_format_string::ArgTypeResult; using clang::analyze_format_string::FormatStringHandler; using clang::analyze_format_string::FormatSpecifier; using clang::analyze_format_string::LengthModifier; using clang::analyze_format_string::OptionalAmount; using clang::analyze_format_string::PositionContext; using clang::analyze_format_string::ConversionSpecifier; using namespace clang; // Key function to FormatStringHandler. FormatStringHandler::~FormatStringHandler() {} //===----------------------------------------------------------------------===// // Functions for parsing format strings components in both printf and // scanf format strings. //===----------------------------------------------------------------------===// OptionalAmount clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { const char *I = Beg; UpdateOnReturn UpdateBeg(Beg, I); unsigned accumulator = 0; bool hasDigits = false; for ( ; I != E; ++I) { char c = *I; if (c >= '0' && c <= '9') { hasDigits = true; accumulator = (accumulator * 10) + (c - '0'); continue; } if (hasDigits) return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, false); break; } return OptionalAmount(); } OptionalAmount clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, const char *E, unsigned &argIndex) { if (*Beg == '*') { ++Beg; return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); } return ParseAmount(Beg, E); } OptionalAmount clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, const char *Start, const char *&Beg, const char *E, PositionContext p) { if (*Beg == '*') { const char *I = Beg + 1; const OptionalAmount &Amt = ParseAmount(I, E); if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { H.HandleInvalidPosition(Beg, I - Beg, p); return OptionalAmount(false); } if (I == E) { // No more characters left? H.HandleIncompleteSpecifier(Start, E - Start); return OptionalAmount(false); } assert(Amt.getHowSpecified() == OptionalAmount::Constant); if (*I == '$') { // Handle positional arguments // Special case: '*0$', since this is an easy mistake. if (Amt.getConstantAmount() == 0) { H.HandleZeroPosition(Beg, I - Beg + 1); return OptionalAmount(false); } const char *Tmp = Beg; Beg = ++I; return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, Tmp, 0, true); } H.HandleInvalidPosition(Beg, I - Beg, p); return OptionalAmount(false); } return ParseAmount(Beg, E); } bool clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E, unsigned *argIndex) { // FIXME: Support negative field widths. if (argIndex) { CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex)); } else { const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E, analyze_format_string::FieldWidthPos); if (Amt.isInvalid()) return true; CS.setFieldWidth(Amt); } return false; } bool clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, FormatSpecifier &FS, const char *Start, const char *&Beg, const char *E) { const char *I = Beg; const OptionalAmount &Amt = ParseAmount(I, E); if (I == E) { // No more characters left? H.HandleIncompleteSpecifier(Start, E - Start); return true; } if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { // Special case: '%0$', since this is an easy mistake. if (Amt.getConstantAmount() == 0) { H.HandleZeroPosition(Start, I - Start); return true; } FS.setArgIndex(Amt.getConstantAmount() - 1); FS.setUsesPositionalArg(); // Update the caller's pointer if we decided to consume // these characters. Beg = I; return false; } return false; } bool clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, const char *&I, const char *E, const LangOptions &LO, bool IsScanf) { LengthModifier::Kind lmKind = LengthModifier::None; const char *lmPosition = I; switch (*I) { default: return false; case 'h': ++I; lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar) : LengthModifier::AsShort; break; case 'l': ++I; lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong) : LengthModifier::AsLong; break; case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; case 'q': lmKind = LengthModifier::AsLongLong; ++I; break; case 'a': if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) { // For scanf in C90, look at the next character to see if this should // be parsed as the GNU extension 'a' length modifier. If not, this // will be parsed as a conversion specifier. ++I; if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { lmKind = LengthModifier::AsAllocate; break; } --I; } return false; } LengthModifier lm(lmPosition, lmKind); FS.setLengthModifier(lm); return true; } //===----------------------------------------------------------------------===// // Methods on ArgTypeResult. //===----------------------------------------------------------------------===// bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { switch (K) { case InvalidTy: llvm_unreachable("ArgTypeResult must be valid"); case UnknownTy: return true; case AnyCharTy: { if (const BuiltinType *BT = argTy->getAs()) switch (BT->getKind()) { default: break; case BuiltinType::Char_S: case BuiltinType::SChar: case BuiltinType::UChar: case BuiltinType::Char_U: return true; } return false; } case SpecificTy: { argTy = C.getCanonicalType(argTy).getUnqualifiedType(); if (T == argTy) return true; // Check for "compatible types". if (const BuiltinType *BT = argTy->getAs()) switch (BT->getKind()) { default: break; case BuiltinType::Char_S: case BuiltinType::SChar: return T == C.UnsignedCharTy; case BuiltinType::Char_U: case BuiltinType::UChar: return T == C.SignedCharTy; case BuiltinType::Short: return T == C.UnsignedShortTy; case BuiltinType::UShort: return T == C.ShortTy; case BuiltinType::Int: return T == C.UnsignedIntTy; case BuiltinType::UInt: return T == C.IntTy; case BuiltinType::Long: return T == C.UnsignedLongTy; case BuiltinType::ULong: return T == C.LongTy; case BuiltinType::LongLong: return T == C.UnsignedLongLongTy; case BuiltinType::ULongLong: return T == C.LongLongTy; } return false; } case CStrTy: { const PointerType *PT = argTy->getAs(); if (!PT) return false; QualType pointeeTy = PT->getPointeeType(); if (const BuiltinType *BT = pointeeTy->getAs()) switch (BT->getKind()) { case BuiltinType::Void: case BuiltinType::Char_U: case BuiltinType::UChar: case BuiltinType::Char_S: case BuiltinType::SChar: return true; default: break; } return false; } case WCStrTy: { const PointerType *PT = argTy->getAs(); if (!PT) return false; QualType pointeeTy = C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); return pointeeTy == C.getWCharType(); } case WIntTy: { // Instead of doing a lookup for the definition of 'wint_t' (which // is defined by the system headers) instead see if wchar_t and // the argument type promote to the same type. QualType PromoWChar = C.getWCharType()->isPromotableIntegerType() ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); QualType PromoArg = argTy->isPromotableIntegerType() ? C.getPromotedIntegerType(argTy) : argTy; PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); return PromoWChar == PromoArg; } case CPointerTy: return argTy->isPointerType() || argTy->isObjCObjectPointerType() || argTy->isNullPtrType(); case ObjCPointerTy: return argTy->getAs() != NULL; } // FIXME: Should be unreachable, but Clang is currently emitting // a warning. return false; } QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { switch (K) { case InvalidTy: llvm_unreachable("No representative type for Invalid ArgTypeResult"); case UnknownTy: return QualType(); case AnyCharTy: return C.CharTy; case SpecificTy: return T; case CStrTy: return C.getPointerType(C.CharTy); case WCStrTy: return C.getPointerType(C.getWCharType()); case ObjCPointerTy: return C.ObjCBuiltinIdTy; case CPointerTy: return C.VoidPtrTy; case WIntTy: { QualType WC = C.getWCharType(); return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; } } // FIXME: Should be unreachable, but Clang is currently emitting // a warning. return QualType(); } std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { std::string S = getRepresentativeType(C).getAsString(); if (Name) return std::string("'") + Name + "' (aka '" + S + "')"; return std::string("'") + S + "'"; } //===----------------------------------------------------------------------===// // Methods on OptionalAmount. //===----------------------------------------------------------------------===// ArgTypeResult analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { return Ctx.IntTy; } //===----------------------------------------------------------------------===// // Methods on LengthModifier. //===----------------------------------------------------------------------===// const char * analyze_format_string::LengthModifier::toString() const { switch (kind) { case AsChar: return "hh"; case AsShort: return "h"; case AsLong: // or AsWideChar return "l"; case AsLongLong: return "ll"; case AsIntMax: return "j"; case AsSizeT: return "z"; case AsPtrDiff: return "t"; case AsLongDouble: return "L"; case AsAllocate: return "a"; case None: return ""; } return NULL; } //===----------------------------------------------------------------------===// // Methods on ConversionSpecifier. //===----------------------------------------------------------------------===// const char *ConversionSpecifier::toString() const { switch (kind) { case dArg: return "d"; case iArg: return "i"; case oArg: return "o"; case uArg: return "u"; case xArg: return "x"; case XArg: return "X"; case fArg: return "f"; case FArg: return "F"; case eArg: return "e"; case EArg: return "E"; case gArg: return "g"; case GArg: return "G"; case aArg: return "a"; case AArg: return "A"; case cArg: return "c"; case sArg: return "s"; case pArg: return "p"; case nArg: return "n"; case PercentArg: return "%"; case ScanListArg: return "["; case InvalidSpecifier: return NULL; // MacOS X unicode extensions. case CArg: return "C"; case SArg: return "S"; // Objective-C specific specifiers. case ObjCObjArg: return "@"; // GlibC specific specifiers. case PrintErrno: return "m"; } return NULL; } //===----------------------------------------------------------------------===// // Methods on OptionalAmount. //===----------------------------------------------------------------------===// void OptionalAmount::toString(raw_ostream &os) const { switch (hs) { case Invalid: case NotSpecified: return; case Arg: if (UsesDotPrefix) os << "."; if (usesPositionalArg()) os << "*" << getPositionalArgIndex() << "$"; else os << "*"; break; case Constant: if (UsesDotPrefix) os << "."; os << amt; break; } } bool FormatSpecifier::hasValidLengthModifier() const { switch (LM.getKind()) { case LengthModifier::None: return true; // Handle most integer flags case LengthModifier::AsChar: case LengthModifier::AsShort: case LengthModifier::AsLongLong: case LengthModifier::AsIntMax: case LengthModifier::AsSizeT: case LengthModifier::AsPtrDiff: switch (CS.getKind()) { case ConversionSpecifier::dArg: case ConversionSpecifier::iArg: case ConversionSpecifier::oArg: case ConversionSpecifier::uArg: case ConversionSpecifier::xArg: case ConversionSpecifier::XArg: case ConversionSpecifier::nArg: return true; default: return false; } // Handle 'l' flag case LengthModifier::AsLong: switch (CS.getKind()) { case ConversionSpecifier::dArg: case ConversionSpecifier::iArg: case ConversionSpecifier::oArg: case ConversionSpecifier::uArg: case ConversionSpecifier::xArg: case ConversionSpecifier::XArg: case ConversionSpecifier::aArg: case ConversionSpecifier::AArg: case ConversionSpecifier::fArg: case ConversionSpecifier::FArg: case ConversionSpecifier::eArg: case ConversionSpecifier::EArg: case ConversionSpecifier::gArg: case ConversionSpecifier::GArg: case ConversionSpecifier::nArg: case ConversionSpecifier::cArg: case ConversionSpecifier::sArg: return true; default: return false; } case LengthModifier::AsLongDouble: switch (CS.getKind()) { case ConversionSpecifier::aArg: case ConversionSpecifier::AArg: case ConversionSpecifier::fArg: case ConversionSpecifier::FArg: case ConversionSpecifier::eArg: case ConversionSpecifier::EArg: case ConversionSpecifier::gArg: case ConversionSpecifier::GArg: return true; default: return false; } case LengthModifier::AsAllocate: switch (CS.getKind()) { case ConversionSpecifier::sArg: case ConversionSpecifier::SArg: case ConversionSpecifier::ScanListArg: return true; default: return false; } } return false; }