diff options
Diffstat (limited to 'examples/Kaleidoscope/Chapter3/toy.cpp')
-rw-r--r-- | examples/Kaleidoscope/Chapter3/toy.cpp | 563 |
1 files changed, 563 insertions, 0 deletions
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp new file mode 100644 index 0000000000..73520d8fa9 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -0,0 +1,563 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Support/IRBuilder.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read top-level expression:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} |