diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
commit | 5f016e2cb5d11daeb237544de1c5d59f20fe1a6e (patch) | |
tree | 8b6bfcb8783d16827f896d5facbd4549300e8a1e /Parse/Parser.cpp | |
parent | a5f182095bf2065ca94f1c86957ee91f9068964b (diff) |
Stage two of getting CFE top correct.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@39734 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'Parse/Parser.cpp')
-rw-r--r-- | Parse/Parser.cpp | 615 |
1 files changed, 615 insertions, 0 deletions
diff --git a/Parse/Parser.cpp b/Parse/Parser.cpp new file mode 100644 index 0000000000..251febbcd6 --- /dev/null +++ b/Parse/Parser.cpp @@ -0,0 +1,615 @@ +//===--- Parser.cpp - C Language Family Parser ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Parser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +Parser::Parser(Preprocessor &pp, Action &actions) + : PP(pp), Actions(actions), Diags(PP.getDiagnostics()) { + Tok.setKind(tok::eof); + CurScope = 0; + + ParenCount = BracketCount = BraceCount = 0; +} + +/// Out-of-line virtual destructor to provide home for Action class. +Action::~Action() {} + + +void Parser::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) { + Diags.Report(Loc, DiagID, &Msg, 1); +} + +/// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'), +/// this helper function matches and consumes the specified RHS token if +/// present. If not present, it emits the specified diagnostic indicating +/// that the parser failed to match the RHS of the token at LHSLoc. LHSName +/// should be the name of the unmatched LHS token. +SourceLocation Parser::MatchRHSPunctuation(tok::TokenKind RHSTok, + SourceLocation LHSLoc) { + + if (Tok.getKind() == RHSTok) + return ConsumeAnyToken(); + + SourceLocation R = Tok.getLocation(); + const char *LHSName = "unknown"; + diag::kind DID = diag::err_parse_error; + switch (RHSTok) { + default: break; + case tok::r_paren : LHSName = "("; DID = diag::err_expected_rparen; break; + case tok::r_brace : LHSName = "{"; DID = diag::err_expected_rbrace; break; + case tok::r_square: LHSName = "["; DID = diag::err_expected_rsquare; break; + case tok::greater: LHSName = "<"; DID = diag::err_expected_greater; break; + } + Diag(Tok, DID); + Diag(LHSLoc, diag::err_matching, LHSName); + SkipUntil(RHSTok); + return R; +} + +/// ExpectAndConsume - The parser expects that 'ExpectedTok' is next in the +/// input. If so, it is consumed and false is returned. +/// +/// If the input is malformed, this emits the specified diagnostic. Next, if +/// SkipToTok is specified, it calls SkipUntil(SkipToTok). Finally, true is +/// returned. +bool Parser::ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned DiagID, + const char *Msg, tok::TokenKind SkipToTok) { + if (Tok.getKind() == ExpectedTok) { + ConsumeAnyToken(); + return false; + } + + Diag(Tok, DiagID, Msg); + if (SkipToTok != tok::unknown) + SkipUntil(SkipToTok); + return true; +} + +//===----------------------------------------------------------------------===// +// Error recovery. +//===----------------------------------------------------------------------===// + +/// SkipUntil - Read tokens until we get to the specified token, then consume +/// it (unless DontConsume is false). Because we cannot guarantee that the +/// token will ever occur, this skips to the next token, or to some likely +/// good stopping point. If StopAtSemi is true, skipping will stop at a ';' +/// character. +/// +/// If SkipUntil finds the specified token, it returns true, otherwise it +/// returns false. +bool Parser::SkipUntil(const tok::TokenKind *Toks, unsigned NumToks, + bool StopAtSemi, bool DontConsume) { + // We always want this function to skip at least one token if the first token + // isn't T and if not at EOF. + bool isFirstTokenSkipped = true; + while (1) { + // If we found one of the tokens, stop and return true. + for (unsigned i = 0; i != NumToks; ++i) { + if (Tok.getKind() == Toks[i]) { + if (DontConsume) { + // Noop, don't consume the token. + } else { + ConsumeAnyToken(); + } + return true; + } + } + + switch (Tok.getKind()) { + case tok::eof: + // Ran out of tokens. + return false; + + case tok::l_paren: + // Recursively skip properly-nested parens. + ConsumeParen(); + SkipUntil(tok::r_paren, false); + break; + case tok::l_square: + // Recursively skip properly-nested square brackets. + ConsumeBracket(); + SkipUntil(tok::r_square, false); + break; + case tok::l_brace: + // Recursively skip properly-nested braces. + ConsumeBrace(); + SkipUntil(tok::r_brace, false); + break; + + // Okay, we found a ']' or '}' or ')', which we think should be balanced. + // Since the user wasn't looking for this token (if they were, it would + // already be handled), this isn't balanced. If there is a LHS token at a + // higher level, we will assume that this matches the unbalanced token + // and return it. Otherwise, this is a spurious RHS token, which we skip. + case tok::r_paren: + if (ParenCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeParen(); + break; + case tok::r_square: + if (BracketCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeBracket(); + break; + case tok::r_brace: + if (BraceCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeBrace(); + break; + + case tok::string_literal: + case tok::wide_string_literal: + ConsumeStringToken(); + break; + case tok::semi: + if (StopAtSemi) + return false; + // FALL THROUGH. + default: + // Skip this token. + ConsumeToken(); + break; + } + isFirstTokenSkipped = false; + } +} + +//===----------------------------------------------------------------------===// +// Scope manipulation +//===----------------------------------------------------------------------===// + +/// ScopeCache - Cache scopes to avoid malloc traffic. +/// FIXME: eliminate this static ctor +static llvm::SmallVector<Scope*, 16> ScopeCache; + +/// EnterScope - Start a new scope. +void Parser::EnterScope(unsigned ScopeFlags) { + if (!ScopeCache.empty()) { + Scope *N = ScopeCache.back(); + ScopeCache.pop_back(); + N->Init(CurScope, ScopeFlags); + CurScope = N; + } else { + CurScope = new Scope(CurScope, ScopeFlags); + } +} + +/// ExitScope - Pop a scope off the scope stack. +void Parser::ExitScope() { + assert(CurScope && "Scope imbalance!"); + + // Inform the actions module that this scope is going away. + Actions.PopScope(Tok.getLocation(), CurScope); + + Scope *Old = CurScope; + CurScope = Old->getParent(); + + if (ScopeCache.size() == 16) + delete Old; + else + ScopeCache.push_back(Old); +} + + + + +//===----------------------------------------------------------------------===// +// C99 6.9: External Definitions. +//===----------------------------------------------------------------------===// + +Parser::~Parser() { + // If we still have scopes active, delete the scope tree. + delete CurScope; + + // Free the scope cache. + while (!ScopeCache.empty()) { + delete ScopeCache.back(); + ScopeCache.pop_back(); + } +} + +/// Initialize - Warm up the parser. +/// +void Parser::Initialize() { + // Prime the lexer look-ahead. + ConsumeToken(); + + // Create the global scope, install it as the current scope. + assert(CurScope == 0 && "A scope is already active?"); + EnterScope(0); + + + // Install builtin types. + // TODO: Move this someplace more useful. + { + const char *Dummy; + + //__builtin_va_list + DeclSpec DS; + bool Error = DS.SetStorageClassSpec(DeclSpec::SCS_typedef, SourceLocation(), + Dummy); + + // TODO: add a 'TST_builtin' type? + Error |= DS.SetTypeSpecType(DeclSpec::TST_int, SourceLocation(), Dummy); + assert(!Error && "Error setting up __builtin_va_list!"); + + Declarator D(DS, Declarator::FileContext); + D.SetIdentifier(PP.getIdentifierInfo("__builtin_va_list"),SourceLocation()); + Actions.ParseDeclarator(CurScope, D, 0, 0); + } + + if (Tok.getKind() == tok::eof) // Empty source file is an extension. + Diag(Tok, diag::ext_empty_source_file); +} + +/// ParseTopLevelDecl - Parse one top-level declaration, return whatever the +/// action tells us to. This returns true if the EOF was encountered. +bool Parser::ParseTopLevelDecl(DeclTy*& Result) { + Result = 0; + if (Tok.getKind() == tok::eof) return true; + + Result = ParseExternalDeclaration(); + return false; +} + +/// Finalize - Shut down the parser. +/// +void Parser::Finalize() { + ExitScope(); + assert(CurScope == 0 && "Scope imbalance!"); +} + +/// ParseTranslationUnit: +/// translation-unit: [C99 6.9] +/// external-declaration +/// translation-unit external-declaration +void Parser::ParseTranslationUnit() { + Initialize(); + + DeclTy *Res; + while (!ParseTopLevelDecl(Res)) + /*parse them all*/; + + Finalize(); +} + +/// ParseExternalDeclaration: +/// external-declaration: [C99 6.9] +/// function-definition [TODO] +/// declaration [TODO] +/// [EXT] ';' +/// [GNU] asm-definition +/// [GNU] __extension__ external-declaration [TODO] +/// [OBJC] objc-class-definition +/// [OBJC] objc-class-declaration +/// [OBJC] objc-alias-declaration +/// [OBJC] objc-protocol-definition +/// [OBJC] objc-method-definition +/// [OBJC] @end +/// +/// [GNU] asm-definition: +/// simple-asm-expr ';' +/// +Parser::DeclTy *Parser::ParseExternalDeclaration() { + switch (Tok.getKind()) { + case tok::semi: + Diag(Tok, diag::ext_top_level_semi); + ConsumeToken(); + // TODO: Invoke action for top-level semicolon. + return 0; + case tok::kw_asm: + ParseSimpleAsm(); + ExpectAndConsume(tok::semi, diag::err_expected_semi_after, + "top-level asm block"); + // TODO: Invoke action for top-level asm. + return 0; + case tok::at: + // @ is not a legal token unless objc is enabled, no need to check. + ParseObjCAtDirectives(); + return 0; + case tok::minus: + if (getLang().ObjC1) { + ParseObjCInstanceMethodDeclaration(); + } else { + Diag(Tok, diag::err_expected_external_declaration); + ConsumeToken(); + } + return 0; + case tok::plus: + if (getLang().ObjC1) { + ParseObjCClassMethodDeclaration(); + } else { + Diag(Tok, diag::err_expected_external_declaration); + ConsumeToken(); + } + return 0; + case tok::kw_typedef: + // A function definition cannot start with a 'typedef' keyword. + return ParseDeclaration(Declarator::FileContext); + default: + // We can't tell whether this is a function-definition or declaration yet. + return ParseDeclarationOrFunctionDefinition(); + } +} + +/// ParseDeclarationOrFunctionDefinition - Parse either a function-definition or +/// a declaration. We can't tell which we have until we read up to the +/// compound-statement in function-definition. +/// +/// function-definition: [C99 6.9.1] +/// declaration-specifiers[opt] declarator declaration-list[opt] +/// compound-statement [TODO] +/// declaration: [C99 6.7] +/// declaration-specifiers init-declarator-list[opt] ';' [TODO] +/// [!C99] init-declarator-list ';' [TODO] +/// [OMP] threadprivate-directive [TODO] +/// +Parser::DeclTy *Parser::ParseDeclarationOrFunctionDefinition() { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + return Actions.ParsedFreeStandingDeclSpec(CurScope, DS); + } + + // Parse the first declarator. + Declarator DeclaratorInfo(DS, Declarator::FileContext); + ParseDeclarator(DeclaratorInfo); + // Error parsing the declarator? + if (DeclaratorInfo.getIdentifier() == 0) { + // If so, skip until the semi-colon or a }. + SkipUntil(tok::r_brace, true); + if (Tok.getKind() == tok::semi) + ConsumeToken(); + return 0; + } + + // If the declarator is the start of a function definition, handle it. + if (Tok.getKind() == tok::equal || // int X()= -> not a function def + Tok.getKind() == tok::comma || // int X(), -> not a function def + Tok.getKind() == tok::semi || // int X(); -> not a function def + Tok.getKind() == tok::kw_asm || // int X() __asm__ -> not a fn def + Tok.getKind() == tok::kw___attribute) {// int X() __attr__ -> not a fn def + // FALL THROUGH. + } else if (DeclaratorInfo.isFunctionDeclarator() && + (Tok.getKind() == tok::l_brace || // int X() {} + isDeclarationSpecifier())) { // int X(f) int f; {} + return ParseFunctionDefinition(DeclaratorInfo); + } else { + if (DeclaratorInfo.isFunctionDeclarator()) + Diag(Tok, diag::err_expected_fn_body); + else + Diag(Tok, diag::err_expected_after_declarator); + SkipUntil(tok::semi); + return 0; + } + + // Parse the init-declarator-list for a normal declaration. + return ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); +} + +/// ParseFunctionDefinition - We parsed and verified that the specified +/// Declarator is well formed. If this is a K&R-style function, read the +/// parameters declaration-list, then start the compound-statement. +/// +/// declaration-specifiers[opt] declarator declaration-list[opt] +/// compound-statement [TODO] +/// +Parser::DeclTy *Parser::ParseFunctionDefinition(Declarator &D) { + const DeclaratorChunk &FnTypeInfo = D.getTypeObject(0); + assert(FnTypeInfo.Kind == DeclaratorChunk::Function && + "This isn't a function declarator!"); + const DeclaratorChunk::FunctionTypeInfo &FTI = FnTypeInfo.Fun; + + // If this declaration was formed with a K&R-style identifier list for the + // arguments, parse declarations for all of the args next. + // int foo(a,b) int a; float b; {} + if (!FTI.hasPrototype && FTI.NumArgs != 0) + ParseKNRParamDeclarations(D); + + // Enter a scope for the function body. + EnterScope(Scope::FnScope); + + // Tell the actions module that we have entered a function definition with the + // specified Declarator for the function. + DeclTy *Res = Actions.ParseStartOfFunctionDef(CurScope, D); + + + // We should have an opening brace now. + if (Tok.getKind() != tok::l_brace) { + Diag(Tok, diag::err_expected_fn_body); + + // Skip over garbage, until we get to '{'. Don't eat the '{'. + SkipUntil(tok::l_brace, true, true); + + // If we didn't find the '{', bail out. + if (Tok.getKind() != tok::l_brace) { + ExitScope(); + return 0; + } + } + + // Do not enter a scope for the brace, as the arguments are in the same scope + // (the function body) as the body itself. Instead, just read the statement + // list and put it into a CompoundStmt for safe keeping. + StmtResult FnBody = ParseCompoundStatementBody(); + if (FnBody.isInvalid) { + ExitScope(); + return 0; + } + + // Leave the function body scope. + ExitScope(); + + // TODO: Pass argument information. + return Actions.ParseFunctionDefBody(Res, FnBody.Val); +} + +/// ParseKNRParamDeclarations - Parse 'declaration-list[opt]' which provides +/// types for a function with a K&R-style identifier list for arguments. +void Parser::ParseKNRParamDeclarations(Declarator &D) { + // We know that the top-level of this declarator is a function. + DeclaratorChunk::FunctionTypeInfo &FTI = D.getTypeObject(0).Fun; + + // Read all the argument declarations. + while (isDeclarationSpecifier()) { + SourceLocation DSStart = Tok.getLocation(); + + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.9.1p6: 'each declaration in the declaration list shall have at + // least one declarator'. + // NOTE: GCC just makes this an ext-warn. It's not clear what it does with + // the declarations though. It's trivial to ignore them, really hard to do + // anything else with them. + if (Tok.getKind() == tok::semi) { + Diag(DSStart, diag::err_declaration_does_not_declare_param); + ConsumeToken(); + continue; + } + + // C99 6.9.1p6: Declarations shall contain no storage-class specifiers other + // than register. + if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified && + DS.getStorageClassSpec() != DeclSpec::SCS_register) { + Diag(DS.getStorageClassSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + if (DS.isThreadSpecified()) { + Diag(DS.getThreadSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + + // Parse the first declarator attached to this declspec. + Declarator ParmDeclarator(DS, Declarator::KNRTypeListContext); + ParseDeclarator(ParmDeclarator); + + // Handle the full declarator list. + while (1) { + DeclTy *AttrList; + // If attributes are present, parse them. + if (Tok.getKind() == tok::kw___attribute) + // FIXME: attach attributes too. + AttrList = ParseAttributes(); + + // Ask the actions module to compute the type for this declarator. + Action::TypeResult TR = + Actions.ParseParamDeclaratorType(CurScope, ParmDeclarator); + if (!TR.isInvalid && + // A missing identifier has already been diagnosed. + ParmDeclarator.getIdentifier()) { + + // Scan the argument list looking for the correct param to apply this + // type. + for (unsigned i = 0; ; ++i) { + // C99 6.9.1p6: those declarators shall declare only identifiers from + // the identifier list. + if (i == FTI.NumArgs) { + Diag(ParmDeclarator.getIdentifierLoc(), diag::err_no_matching_param, + ParmDeclarator.getIdentifier()->getName()); + break; + } + + if (FTI.ArgInfo[i].Ident == ParmDeclarator.getIdentifier()) { + // Reject redefinitions of parameters. + if (FTI.ArgInfo[i].TypeInfo) { + Diag(ParmDeclarator.getIdentifierLoc(), + diag::err_param_redefinition, + ParmDeclarator.getIdentifier()->getName()); + } else { + FTI.ArgInfo[i].TypeInfo = TR.Val; + } + break; + } + } + } + + // If we don't have a comma, it is either the end of the list (a ';') or + // an error, bail out. + if (Tok.getKind() != tok::comma) + break; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + ParmDeclarator.clear(); + ParseDeclarator(ParmDeclarator); + } + + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + } else { + Diag(Tok, diag::err_parse_error); + // Skip to end of block or statement + SkipUntil(tok::semi, true); + if (Tok.getKind() == tok::semi) + ConsumeToken(); + } + } + + // The actions module must verify that all arguments were declared. +} + + +/// ParseAsmStringLiteral - This is just a normal string-literal, but is not +/// allowed to be a wide string, and is not subject to character translation. +/// +/// [GNU] asm-string-literal: +/// string-literal +/// +void Parser::ParseAsmStringLiteral() { + if (!isTokenStringLiteral()) { + Diag(Tok, diag::err_expected_string_literal); + return; + } + + ExprResult Res = ParseStringLiteralExpression(); + if (Res.isInvalid) return; + + // TODO: Diagnose: wide string literal in 'asm' +} + +/// ParseSimpleAsm +/// +/// [GNU] simple-asm-expr: +/// 'asm' '(' asm-string-literal ')' +/// +void Parser::ParseSimpleAsm() { + assert(Tok.getKind() == tok::kw_asm && "Not an asm!"); + ConsumeToken(); + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "asm"); + return; + } + + SourceLocation Loc = ConsumeParen(); + + ParseAsmStringLiteral(); + + MatchRHSPunctuation(tok::r_paren, Loc); +} + |