diff options
author | Chris Lattner <sabre@nondot.org> | 2007-11-18 08:46:26 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2007-11-18 08:46:26 +0000 |
commit | 8e3a8e0452695643d04c21e15c94b802aef81bae (patch) | |
tree | 9884e80bdb01fa3acb2b692d471f42b06c510032 | |
parent | c1819188b6c971b91c680a9a3c077b84a110e5fd (diff) |
Replace the original flex lexer with a hand writen one. This
drops a dependency on flex and lets us make future progress more
easily. Yay for 2 fewer .cvs files to make silly conflicts with.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@44213 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/AsmParser/LLLexer.cpp | 826 | ||||
-rw-r--r-- | lib/AsmParser/LLLexer.h | 57 | ||||
-rw-r--r-- | lib/AsmParser/Lexer.cpp.cvs | 3310 | ||||
-rw-r--r-- | lib/AsmParser/Lexer.l | 512 | ||||
-rw-r--r-- | lib/AsmParser/Lexer.l.cvs | 512 | ||||
-rw-r--r-- | lib/AsmParser/Makefile | 5 | ||||
-rw-r--r-- | lib/AsmParser/Parser.cpp | 44 | ||||
-rw-r--r-- | lib/AsmParser/ParserInternals.h | 30 | ||||
-rw-r--r-- | lib/AsmParser/llvmAsmParser.y | 57 |
9 files changed, 929 insertions, 4424 deletions
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp new file mode 100644 index 0000000000..310ea5b052 --- /dev/null +++ b/lib/AsmParser/LLLexer.cpp @@ -0,0 +1,826 @@ +//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the Lexer for .ll files. +// +//===----------------------------------------------------------------------===// + +#include "LLLexer.h" +#include "ParserInternals.h" +#include "llvm/Support/MemoryBuffer.h" + +#include <list> +#include "llvmAsmParser.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Helper functions. +//===----------------------------------------------------------------------===// + +// atoull - Convert an ascii string of decimal digits into the unsigned long +// long representation... this does not have to do input error checking, +// because we know that the input will be matched by a suitable regex... +// +static uint64_t atoull(const char *Buffer, const char *End) { + uint64_t Result = 0; + for (; Buffer != End; Buffer++) { + uint64_t OldRes = Result; + Result *= 10; + Result += *Buffer-'0'; + if (Result < OldRes) { // Uh, oh, overflow detected!!! + GenerateError("constant bigger than 64 bits detected!"); + return 0; + } + } + return Result; +} + +static uint64_t HexIntToVal(const char *Buffer, const char *End) { + uint64_t Result = 0; + for (; Buffer != End; ++Buffer) { + uint64_t OldRes = Result; + Result *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Result += C-'0'; + else if (C >= 'A' && C <= 'F') + Result += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Result += C-'a'+10; + + if (Result < OldRes) { // Uh, oh, overflow detected!!! + GenerateError("constant bigger than 64 bits detected!"); + return 0; + } + } + return Result; +} + +// HexToFP - Convert the ascii string in hexadecimal format to the floating +// point representation of it. +// +static double HexToFP(const char *Buffer, const char *End) { + return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double +} + +static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ + Pair[0] = 0; + for (int i=0; i<16; i++, Buffer++) { + assert(Buffer != End); + Pair[0] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[0] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[0] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[0] += C-'a'+10; + } + Pair[1] = 0; + for (int i=0; i<16 && Buffer != End; i++, Buffer++) { + Pair[1] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[1] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[1] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[1] += C-'a'+10; + } + if (*Buffer) + GenerateError("constant bigger than 128 bits detected!"); +} + +// UnEscapeLexed - Run through the specified buffer and change \xx codes to the +// appropriate character. +static void UnEscapeLexed(std::string &Str) { + if (Str.empty()) return; + + char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); + char *BOut = Buffer; + for (char *BIn = Buffer; BIn != EndBuffer; ) { + if (BIn[0] == '\\') { + if (BIn < EndBuffer-1 && BIn[1] == '\\') { + *BOut++ = '\\'; // Two \ becomes one + BIn += 2; + } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { + char Tmp = BIn[3]; BIn[3] = 0; // Terminate string + *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number + BIn[3] = Tmp; // Restore character + BIn += 3; // Skip over handled chars + ++BOut; + } else { + *BOut++ = *BIn++; + } + } else { + *BOut++ = *BIn++; + } + } + Str.resize(BOut-Buffer); +} + +/// isLabelChar - Return true for [-a-zA-Z$._0-9]. +static bool isLabelChar(char C) { + return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; +} + + +/// isLabelTail - Return true if this pointer points to a valid end of a label. +static const char *isLabelTail(const char *CurPtr) { + while (1) { + if (CurPtr[0] == ':') return CurPtr+1; + if (!isLabelChar(CurPtr[0])) return 0; + ++CurPtr; + } +} + + + +//===----------------------------------------------------------------------===// +// Lexer definition. +//===----------------------------------------------------------------------===// + +// FIXME: REMOVE THIS. +#define YYEOF 0 +#define YYERROR -2 + +LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { + CurPtr = CurBuf->getBufferStart(); +} + +std::string LLLexer::getFilename() const { + return CurBuf->getBufferIdentifier(); +} + +int LLLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: return (unsigned char)CurChar; + case 0: + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + case '\n': + case '\r': + // Handle the newline character by ignoring it and incrementing the line + // count. However, be careful about 'dos style' files with \n\r in them. + // Only treat a \n\r or \r\n as a single line. + if ((*CurPtr == '\n' || (*CurPtr == '\r')) && + *CurPtr != CurChar) + ++CurPtr; // Eat the two char newline sequence. + + ++CurLineNo; + return '\n'; + } +} + + +int LLLexer::LexToken() { + TokStart = CurPtr; + + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle letters: [a-zA-Z_] + if (isalpha(CurChar) || CurChar == '_') + return LexIdentifier(); + + return CurChar; + case EOF: return YYEOF; + case 0: + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return LexToken(); + case '+': return LexPositive(); + case '@': return LexAt(); + case '%': return LexPercent(); + case '"': return LexQuote(); + case '.': + if (const char *Ptr = isLabelTail(CurPtr)) { + CurPtr = Ptr; + llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); + return LABELSTR; + } + if (CurPtr[0] == '.' && CurPtr[1] == '.') { + CurPtr += 2; + return DOTDOTDOT; + } + return '.'; + case '$': + if (const char *Ptr = isLabelTail(CurPtr)) { + CurPtr = Ptr; + llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); + return LABELSTR; + } + return '$'; + case ';': + SkipLineComment(); + return LexToken(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '-': + return LexDigitOrNegative(); + } +} + +void LLLexer::SkipLineComment() { + while (1) { + if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF) + return; + } +} + +/// LexAt - Lex all tokens that start with an @ character: +/// AtStringConstant @\"[^\"]*\" +/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// GlobalVarID @[0-9]+ +int LLLexer::LexAt() { + // Handle AtStringConstant: @\"[^\"]*\" + if (CurPtr[0] == '"') { + ++CurPtr; + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + GenerateError("End of file in global variable name"); + return YYERROR; + } + if (CurChar == '"') { + llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); + UnEscapeLexed(*llvmAsmlval.StrVal); + return ATSTRINGCONSTANT; + } + } + } + + // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + + llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ + return GLOBALVAR; + } + + // Handle GlobalVarID: @[0-9]+ + if (isdigit(CurPtr[0])) { + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); + + uint64_t Val = atoull(TokStart+1, CurPtr); + if ((unsigned)Val != Val) + GenerateError("Invalid value number (too large)!"); + llvmAsmlval.UIntVal = unsigned(Val); + return GLOBALVAL_ID; + } + + return '@'; +} + + +/// LexPercent - Lex all tokens that start with a % character: +/// PctStringConstant %\"[^\"]*\" +/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID %[0-9]+ +int LLLexer::LexPercent() { + // Handle PctStringConstant: %\"[^\"]*\" + if (CurPtr[0] == '"') { + ++CurPtr; + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + GenerateError("End of file in local variable name"); + return YYERROR; + } + if (CurChar == '"') { + llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); + UnEscapeLexed(*llvmAsmlval.StrVal); + return PCTSTRINGCONSTANT; + } + } + } + + // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + + llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % + return LOCALVAR; + } + + // Handle LocalVarID: %[0-9]+ + if (isdigit(CurPtr[0])) { + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); + + uint64_t Val = atoull(TokStart+1, CurPtr); + if ((unsigned)Val != Val) + GenerateError("Invalid value number (too large)!"); + llvmAsmlval.UIntVal = unsigned(Val); + return LOCALVAL_ID; + } + + return '%'; +} + +/// LexQuote - Lex all tokens that start with a " character: +/// QuoteLabel "[^"]+": +/// StringConstant "[^"]*" +int LLLexer::LexQuote() { + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + GenerateError("End of file in quoted string"); + return YYERROR; + } + + if (CurChar != '"') continue; + + if (CurPtr[0] != ':') { + llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); + UnEscapeLexed(*llvmAsmlval.StrVal); + return STRINGCONSTANT; + } + + ++CurPtr; + llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); + UnEscapeLexed(*llvmAsmlval.StrVal); + return LABELSTR; + } +} + +static bool JustWhitespaceNewLine(const char *&Ptr) { + const char *ThisPtr = Ptr; + while (*ThisPtr == ' ' || *ThisPtr == '\t') + ++ThisPtr; + if (*ThisPtr == '\n' || *ThisPtr == '\r') { + Ptr = ThisPtr; + return true; + } + return false; +} + + +/// LexIdentifier: Handle several related productions: +/// Label [-a-zA-Z$._0-9]+: +/// IntegerType i[0-9]+ +/// Keyword sdiv, float, ... +/// HexIntConstant [us]0x[0-9A-Fa-f]+ +int LLLexer::LexIdentifier() { + const char *StartChar = CurPtr; + const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; + const char *KeywordEnd = 0; + + for (; isLabelChar(*CurPtr); ++CurPtr) { + // If we decide this is an integer, remember the end of the sequence. + if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; + if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; + } + + // If we stopped due to a colon, this really is a label. + if (*CurPtr == ':') { + llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); + return LABELSTR; + } + + // Otherwise, this wasn't a label. If this was valid as an integer type, + // return it. + if (IntEnd == 0) IntEnd = CurPtr; + if (IntEnd != StartChar) { + CurPtr = IntEnd; + uint64_t NumBits = atoull(StartChar, CurPtr); + if (NumBits < IntegerType::MIN_INT_BITS || + NumBits > IntegerType::MAX_INT_BITS) { + GenerateError("Bitwidth for integer type out of range!"); + return YYERROR; + } + const Type* Ty = IntegerType::get(NumBits); + llvmAsmlval.PrimType = Ty; + return INTTYPE; + } + + // Otherwise, this was a letter sequence. See which keyword this is. + if (KeywordEnd == 0) KeywordEnd = CurPtr; + CurPtr = KeywordEnd; + --StartChar; + unsigned Len = CurPtr-StartChar; +#define KEYWORD(STR, TOK) \ + if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; + + KEYWORD("begin", BEGINTOK); + KEYWORD("end", ENDTOK); + KEYWORD("true", TRUETOK); + KEYWORD("false", FALSETOK); + KEYWORD("declare", DECLARE); + KEYWORD("define", DEFINE); + KEYWORD("global", GLOBAL); + KEYWORD("constant", CONSTANT); + + KEYWORD("internal", INTERNAL); + KEYWORD("linkonce", LINKONCE); + KEYWORD("weak", WEAK); + KEYWORD("appending", APPENDING); + KEYWORD("dllimport", DLLIMPORT); + KEYWORD("dllexport", DLLEXPORT); + KEYWORD("hidden", HIDDEN); + KEYWORD("protected", PROTECTED); + KEYWORD("extern_weak", EXTERN_WEAK); + KEYWORD("external", EXTERNAL); + KEYWORD("thread_local", THREAD_LOCAL); + KEYWORD("zeroinitializer", ZEROINITIALIZER); + KEYWORD("undef", UNDEF); + KEYWORD("null", NULL_TOK); + KEYWORD("to", TO); + KEYWORD("tail", TAIL); + KEYWORD("target", TARGET); + KEYWORD("triple", TRIPLE); + KEYWORD("deplibs", DEPLIBS); + KEYWORD("datalayout", DATALAYOUT); + KEYWORD("volatile", VOLATILE); + KEYWORD("align", ALIGN); + KEYWORD("section", SECTION); + KEYWORD("alias", ALIAS); + KEYWORD("module", MODULE); + KEYWORD("asm", ASM_TOK); + KEYWORD("sideeffect", SIDEEFFECT); + + KEYWORD("cc", CC_TOK); + KEYWORD("ccc", CCC_TOK); + KEYWORD("fastcc", FASTCC_TOK); + KEYWORD("coldcc", COLDCC_TOK); + KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); + KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); + + KEYWORD("signext", SIGNEXT); + KEYWORD("zeroext", ZEROEXT); + KEYWORD("inreg", INREG); + KEYWORD("sret", SRET); + KEYWORD("nounwind", NOUNWIND); + KEYWORD("noreturn", NORETURN); + KEYWORD("noalias", NOALIAS); + KEYWORD("byval", BYVAL); + KEYWORD("nest", NEST); + KEYWORD("pure", PURE); + KEYWORD("const", CONST); + + KEYWORD("type", TYPE); + KEYWORD("opaque", OPAQUE); + + KEYWORD("eq" , EQ); + KEYWORD("ne" , NE); + KEYWORD("slt", SLT); + KEYWORD("sgt", SGT); + KEYWORD("sle", SLE); + KEYWORD("sge", SGE); + KEYWORD("ult", ULT); + KEYWORD("ugt", UGT); + KEYWORD("ule", ULE); + KEYWORD("uge", UGE); + KEYWORD("oeq", OEQ); + KEYWORD("one", ONE); + KEYWORD("olt", OLT); + KEYWORD("ogt", OGT); + KEYWORD("ole", OLE); + KEYWORD("oge", OGE); + KEYWORD("ord", ORD); + KEYWORD("uno", UNO); + KEYWORD("ueq", UEQ); + KEYWORD("une", UNE); +#undef KEYWORD + + // Keywords for types. +#define TYPEKEYWORD(STR, LLVMTY, TOK) \ + if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ + llvmAsmlval.PrimType = LLVMTY; return TOK; } + TYPEKEYWORD("void", Type::VoidTy, VOID); + TYPEKEYWORD("float", Type::FloatTy, FLOAT); + TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); + TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); + TYPEKEYWORD("fp128", Type::FP128Ty, FP128); + TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); + TYPEKEYWORD("label", Type::LabelTy, LABEL); +#undef TYPEKEYWORD + + // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is + // to avoid conflicting with the sext/zext instructions, below. + if (Len == 4 && !memcmp(StartChar, "sext", 4)) { + // Scan CurPtr ahead, seeing if there is just whitespace before the newline. + if (JustWhitespaceNewLine(CurPtr)) + return SIGNEXT; + } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { + // Scan CurPtr ahead, seeing if there is just whitespace before the newline. + if (JustWhitespaceNewLine(CurPtr)) + return ZEROEXT; + } + + // Keywords for instructions. +#define INSTKEYWORD(STR, type, Enum, TOK) \ + if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ + llvmAsmlval.type = Instruction::Enum; return TOK; } + + INSTKEYWORD("add", BinaryOpVal, Add, ADD); + INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); + INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); + INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); + INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); + INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); + INSTKEYWORD("urem", BinaryOpVal, URem, UREM); + INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); + INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); + INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); + INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); + INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); + INSTKEYWORD("and", BinaryOpVal, And, AND); + INSTKEYWORD("or", BinaryOpVal, Or , OR ); + INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); + INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); + INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); + + INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); + INSTKEYWORD("call", OtherOpVal, Call, CALL); + INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); + INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); + INSTKEYWORD("sext", CastOpVal, SExt, SEXT); + INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); + INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); + INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); + INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); + INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); + INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); + INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); + INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); + INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); + INSTKEYWORD("select", OtherOpVal, Select, SELECT); + INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); + INSTKEYWORD("ret", TermOpVal, Ret, RET); + INSTKEYWORD("br", TermOpVal, Br, BR); + INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); + INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); + INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); + INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); + + INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); + INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); + INSTKEYWORD("free", MemOpVal, Free, FREE); + INSTKEYWORD("load", MemOpVal, Load, LOAD); + INSTKEYWORD("store", MemOpVal, Store, STORE); + INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); + + INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); + INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); + INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); +#undef INSTKEYWORD + + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by + // the CFE to avoid forcing it to deal with 64-bit numbers. + if ((TokStart[0] == 'u' || TokStart[0] == 's') && + TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { + int len = CurPtr-TokStart-3; + uint32_t bits = len * 4; + APInt Tmp(bits, TokStart+3, len, 16); + uint32_t activeBits = Tmp.getActiveBits(); + if (activeBits > 0 && activeBits < bits) + Tmp.trunc(activeBits); + if (Tmp.getBitWidth() > 64) { + llvmAsmlval.APIntVal = new APInt(Tmp); + return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; + } else if (TokStart[0] == 's') { + llvmAsmlval.SInt64Val = Tmp.getSExtValue(); + return ESINT64VAL; + } else { + llvmAsmlval.UInt64Val = Tmp.getZExtValue(); + return EUINT64VAL; + } + } + + // Finally, if this is "cc1234", return this as just "cc". + if (TokStart[0] == 'c' && TokStart[1] == 'c') { + CurPtr = TokStart+2; + return CC_TOK; + } + + // Finally, if this isn't known, return just a single character. + CurPtr = TokStart+1; + return TokStart[0]; +} + + +/// Lex0x: Handle productions that start with 0x, knowing that it matches and +/// that this is not a label: +/// HexFPConstant 0x[0-9A-Fa-f]+ +/// HexFP80Constant 0xK[0-9A-Fa-f]+ +/// HexFP128Constant 0xL[0-9A-Fa-f]+ +/// HexPPC128Constant 0xM[0-9A-Fa-f]+ +int LLLexer::Lex0x() { + CurPtr = TokStart + 2; + + char Kind; + if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { + Kind = *CurPtr++; + } else { + Kind = 'J'; + } + + if (!isxdigit(CurPtr[0])) { + // Bad token, return it as just zero. + CurPtr = TokStart+1; + return '0'; + } + + while (isxdigit(CurPtr[0])) + ++CurPtr; + + if (Kind == 'J') { + // HexFPConstant - Floating point constant represented in IEEE format as a + // hexadecimal number for when exponential notation is not precise enough. + // Float and double only. + llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); + return FPVAL; + } + + uint64_t Pair[2]; + HexToIntPair(TokStart+3, CurPtr, Pair); + switch (Kind) { + default: assert(0 && "Unknown kind!"); + case 'K': + // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) + llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); + return FPVAL; + case 'L': + // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) + llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); + return FPVAL; + case 'M': + // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) + llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); + return FPVAL; + } +} + +/// LexIdentifier: Handle several related productions: +/// Label [-a-zA-Z$._0-9]+: +/// NInteger -[0-9]+ +/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? +/// PInteger [0-9]+ +/// HexFPConstant 0x[0-9A-Fa-f]+ +/// HexFP80Constant 0xK[0-9A-Fa-f]+ +/// HexFP128Constant 0xL[0-9A-Fa-f]+ +/// HexPPC128Constant 0xM[0-9A-Fa-f]+ +int LLLexer::LexDigitOrNegative() { + // If the letter after the negative is a number, this is probably a label. + if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { + // Okay, this is not a number after the -, it's probably a label. + if (const char *End = isLabelTail(CurPtr)) { + llvmAsmlval.StrVal = new std::string(TokStart, End-1); + CurPtr = End; + return LABELSTR; + } + + return CurPtr[-1]; + } + + // At this point, it is either a label, int or fp constant. + + // Skip digits, we have at least one. + for (; isdigit(CurPtr[0]); ++CurPtr); + + // Check to see if this really is a label afterall, e.g. "-1:". + if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { + if (const char *End = isLabelTail(CurPtr)) { + llvmAsmlval.StrVal = new std::string(TokStart, End-1); + CurPtr = End; + return LABELSTR; + } + } + + // If the next character is a '.', then it is a fp value, otherwise its + // integer. + if (CurPtr[0] != '.') { + if (TokStart[0] == '0' && TokStart[1] == 'x') + return Lex0x(); + unsigned Len = CurPtr-TokStart; + uint32_t numBits = ((Len * 64) / 19) + 2; + APInt Tmp(numBits, TokStart, Len, 10); + if (TokStart[0] == '-') { + uint32_t minBits = Tmp.getMinSignedBits(); + if (minBits > 0 && minBits < numBits) + Tmp.trunc(minBits); + if (Tmp.getBitWidth() > 64) { + llvmAsmlval.APIntVal = new APInt(Tmp); + return ESAPINTVAL; + } else { + llvmAsmlval.SInt64Val = Tmp.getSExtValue(); + return ESINT64VAL; + } + } else { + uint32_t activeBits = Tmp.getActiveBits(); + if (activeBits > 0 && activeBits < numBits) + Tmp.trunc(activeBits); + if (Tmp.getBitWidth() > 64) { + llvmAsmlval.APIntVal = new APInt(Tmp); + return EUAPINTVAL; + } else { + llvmAsmlval.UInt64Val = Tmp.getZExtValue(); + return EUINT64VAL; + } + } + } + + ++CurPtr; + + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(CurPtr[0])) ++CurPtr; + + if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { + if (isdigit(CurPtr[1]) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + CurPtr += 2; + while (isdigit(CurPtr[0])) ++CurPtr; + } + } + + llvmAsmlval.FPVal = new APFloat(atof(TokStart)); + return FPVAL; +} + +/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? +int LLLexer::LexPositive() { + // If the letter after the negative is a number, this is probably not a + // label. + if (!isdigit(CurPtr[0])) + return CurPtr[-1]; + + // Skip digits. + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); + + // At this point, we need a '.'. + if (CurPtr[0] != '.') { + CurPtr = TokStart+1; + return TokStart[0]; + } + + ++CurPtr; + + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(CurPtr[0])) ++CurPtr; + + if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { + if (isdigit(CurPtr[1]) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + CurPtr += 2; + while (isdigit(CurPtr[0])) ++CurPtr; + } + } + + llvmAsmlval.FPVal = new APFloat(atof(TokStart)); + return FPVAL; +} + + +//===----------------------------------------------------------------------===// +// Define the interface to this file. +//===----------------------------------------------------------------------===// + +static LLLexer *TheLexer; + +void InitLLLexer(llvm::MemoryBuffer *MB) { + assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); + TheLexer = new LLLexer(MB); +} + +int llvmAsmlex() { + return TheLexer->LexToken(); +} +const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } +unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } +std::string LLLgetFilename() { return TheLexer->getFilename(); } +unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } + +void FreeLexer() { + delete TheLexer; + TheLexer = 0; +} diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h new file mode 100644 index 0000000000..eaf32203bb --- /dev/null +++ b/lib/AsmParser/LLLexer.h @@ -0,0 +1,57 @@ +//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents the Lexer for .ll files. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_ASMPARSER_LLLEXER_H +#define LIB_ASMPARSER_LLLEXER_H + +#include <vector> +#include <string> +#include <iosfwd> + +namespace llvm { + class MemoryBuffer; + + class LLLexer { + const char *CurPtr; + unsigned CurLineNo; + MemoryBuffer *CurBuf; + + const char *TokStart; + + std::string TheError; + public: + LLLexer(MemoryBuffer *StartBuf); + ~LLLexer() {} + + const char *getTokStart() const { return TokStart; } + unsigned getTokLength() const { return CurPtr-TokStart; } + unsigned getLineNo() const { return CurLineNo; } + std::string getFilename() const; + int LexToken(); + + const std::string getError() const { return TheError; } + + private: + int getNextChar(); + void SkipLineComment(); + int LexIdentifier(); + int LexDigitOrNegative(); + int LexPositive(); + int LexAt(); + int LexPercent(); + int LexQuote(); + int Lex0x(); + }; +} // end namespace llvm + +#endif diff --git a/lib/AsmParser/Lexer.cpp.cvs b/lib/AsmParser/Lexer.cpp.cvs deleted file mode 100644 index 4522aa84c3..0000000000 --- a/lib/AsmParser/Lexer.cpp.cvs +++ /dev/null @@ -1,3310 +0,0 @@ -#line 2 "Lexer.cpp" - -#line 4 "Lexer.cpp" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 33 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; -#endif /* ! C99 */ - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -#if __STDC__ - -#define YY_USE_CONST - -#endif /* __STDC__ */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN (yy_start) = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START (((yy_start) - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE llvmAsmrestart(llvmAsmin ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE (16384*64) -#endif - -/* The state buf must be large enough to hold one state per character in the main buffer. - */ -#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) |