diff options
author | Chris Lattner <sabre@nondot.org> | 2009-06-21 19:21:25 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2009-06-21 19:21:25 +0000 |
commit | 4651bca31bdad27184fa0d36640bf5ef1d83cf5c (patch) | |
tree | b97455dfc60462b5a65c04d5b95393f838627af2 /tools/llvm-mc/AsmLexer.cpp | |
parent | 1c3329f7072356c8da84534ed0a7033b10f73062 (diff) |
implement enough of a lexer to get through Olden/health/Output/health.llc.s
without errors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@73855 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'tools/llvm-mc/AsmLexer.cpp')
-rw-r--r-- | tools/llvm-mc/AsmLexer.cpp | 162 |
1 files changed, 157 insertions, 5 deletions
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp index da86465d7f..578eec1852 100644 --- a/tools/llvm-mc/AsmLexer.cpp +++ b/tools/llvm-mc/AsmLexer.cpp @@ -14,6 +14,7 @@ #include "AsmLexer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/MemoryBuffer.h" +#include <cerrno> using namespace llvm; AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { @@ -23,6 +24,10 @@ AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { TokStart = 0; } +SMLoc AsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + void AsmLexer::PrintError(const char *Loc, const std::string &Msg) const { SrcMgr.PrintError(SMLoc::getFromPointer(Loc), Msg); } @@ -31,6 +36,13 @@ void AsmLexer::PrintError(SMLoc Loc, const std::string &Msg) const { SrcMgr.PrintError(Loc, Msg); } +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return asmtok::Error. +asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + PrintError(Loc, Msg); + return asmtok::Error; +} + int AsmLexer::getNextChar() { char CurChar = *CurPtr++; switch (CurChar) { @@ -59,6 +71,129 @@ int AsmLexer::getNextChar() { } } +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +asmtok::TokKind AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Skip % + return asmtok::Identifier; +} + +/// LexPercent: Register: %[a-zA-Z0-9]+ +asmtok::TokKind AsmLexer::LexPercent() { + if (!isalnum(*CurPtr)) + return asmtok::Error; // Must have at least one character. + while (isalnum(*CurPtr)) + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Skip % + return asmtok::Register; +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +asmtok::TokKind AsmLexer::LexSlash() { + if (*CurPtr != '*') + return asmtok::Slash; + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + PrintError(TokStart, "Unterminated comment!"); + return asmtok::Error; + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexHash: Comment: #[^\n]* +asmtok::TokKind AsmLexer::LexHash() { + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return asmtok::Eof; + return asmtok::EndOfStatement; +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +asmtok::TokKind AsmLexer::LexDigit() { + if (*CurPtr == ':') + return asmtok::Error; // FIXME LOCAL LABEL. + if (*CurPtr == 'f' || *CurPtr == 'b') + return asmtok::Error; // FIXME FORWARD/BACKWARD LABEL. + + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 10); + return asmtok::IntVal; + } + + if (*CurPtr == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return asmtok::IntVal; + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(CurPtr-2, "Hexadecimal number out of range"); + } + return asmtok::IntVal; + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 8); + return asmtok::IntVal; +} + + asmtok::TokKind AsmLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. @@ -66,9 +201,9 @@ asmtok::TokKind AsmLexer::LexToken() { switch (CurChar) { default: - // Handle letters: [a-zA-Z_] -// if (isalpha(CurChar) || CurChar == '_' || CurChar == '#') -// return LexIdentifier(); + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); // Unknown character, emit an error. return asmtok::Error; @@ -76,12 +211,29 @@ asmtok::TokKind AsmLexer::LexToken() { case 0: case ' ': case '\t': - case '\n': - case '\r': // Ignore whitespace. return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return asmtok::EndOfStatement; case ':': return asmtok::Colon; case '+': return asmtok::Plus; case '-': return asmtok::Minus; + case '(': return asmtok::LParen; + case ')': return asmtok::RParen; + case '*': return asmtok::Star; + case ',': return asmtok::Comma; + case '$': return asmtok::Dollar; + case '%': return LexPercent(); + case '/': return LexSlash(); + case '#': return LexHash(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. } }
\ No newline at end of file |