//== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the HTMLRewriter clas, which is used to translate the // text of a source file into prettified HTML. // //===----------------------------------------------------------------------===// #include "clang/Rewrite/Rewriter.h" #include "clang/Rewrite/HTMLRewrite.h" #include "clang/Lex/Preprocessor.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/MemoryBuffer.h" #include using namespace clang; void html::EscapeText(Rewriter& R, unsigned FileID, bool EscapeSpaces, bool ReplaceTabs) { const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FileID); const char* C = Buf->getBufferStart(); const char* FileEnd = Buf->getBufferEnd(); assert (C <= FileEnd); RewriteBuffer &RB = R.getEditBuffer(FileID); for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) { switch (*C) { default: break; case ' ': if (EscapeSpaces) RB.ReplaceText(FilePos, 1, " ", 6); break; case '\t': if (!ReplaceTabs) break; if (EscapeSpaces) RB.ReplaceText(FilePos, 1, "    ", 6*4); else RB.ReplaceText(FilePos, 1, " ", 4); break; case '<': RB.ReplaceText(FilePos, 1, "<", 4); break; case '>': RB.ReplaceText(FilePos, 1, ">", 4); break; case '&': RB.ReplaceText(FilePos, 1, "&", 5); break; } } } std::string html::EscapeText(const std::string& s, bool EscapeSpaces, bool ReplaceTabs) { unsigned len = s.size(); std::ostringstream os; for (unsigned i = 0 ; i < len; ++i) { char c = s[i]; switch (c) { default: os << c; break; case ' ': if (EscapeSpaces) os << " "; else os << ' '; break; case '\t': if (ReplaceTabs) for (unsigned i = 0; i < 4; ++i) os << " "; else os << c; break; case '<': os << "<"; break; case '>': os << ">"; break; case '&': os << "&"; break; } } return os.str(); } static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo, unsigned B, unsigned E) { llvm::SmallString<100> Str; Str += ""; Str.append_uint(LineNo); Str += ""; if (B == E) { // Handle empty lines. Str += " "; RB.InsertTextBefore(B, &Str[0], Str.size()); } else { RB.InsertTextBefore(B, &Str[0], Str.size()); RB.InsertTextBefore(E, "", strlen("")); } } void html::AddLineNumbers(Rewriter& R, unsigned FileID) { const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FileID); const char* FileBeg = Buf->getBufferStart(); const char* FileEnd = Buf->getBufferEnd(); const char* C = FileBeg; RewriteBuffer &RB = R.getEditBuffer(FileID); assert (C <= FileEnd); unsigned LineNo = 0; unsigned FilePos = 0; while (C != FileEnd) { ++LineNo; unsigned LineStartPos = FilePos; unsigned LineEndPos = FileEnd - FileBeg; assert (FilePos <= LineEndPos); assert (C < FileEnd); // Scan until the newline (or end-of-file). while (C != FileEnd) { char c = *C; ++C; if (c == '\n') { LineEndPos = FilePos++; break; } ++FilePos; } AddLineNumber(RB, LineNo, LineStartPos, LineEndPos); } // Add one big table tag that surrounds all of the code. RB.InsertTextBefore(0, "\n", strlen("
\n")); RB.InsertTextAfter(FileEnd - FileBeg, "
", strlen("")); } void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, unsigned FileID) { const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FileID); const char* FileStart = Buf->getBufferStart(); const char* FileEnd = Buf->getBufferEnd(); SourceLocation StartLoc = SourceLocation::getFileLoc(FileID, 0); SourceLocation EndLoc = SourceLocation::getFileLoc(FileID, FileEnd-FileStart); // Generate header R.InsertCStrBefore(StartLoc, "\n\n" "\n\n"); // Generate footer R.InsertCStrAfter(EndLoc, "\n"); } /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with /// information about keywords, macro expansions etc. This uses the macro /// table state from the end of the file, so it won't be perfectly perfect, /// but it will be reasonably close. void html::SyntaxHighlight(Rewriter &R, unsigned FileID, Preprocessor &PP) { RewriteBuffer &RB = R.getEditBuffer(FileID); const SourceManager &SourceMgr = PP.getSourceManager(); std::pair File = SourceMgr.getBufferData(FileID); const char *BufferStart = File.first; Lexer L(SourceLocation::getFileLoc(FileID, 0), PP.getLangOptions(), File.first, File.second); // Inform the preprocessor that we want to retain comments as tokens, so we // can highlight them. L.SetCommentRetentionState(true); // Lex all the tokens in raw mode, to avoid entering #includes or expanding // macros. Token Tok; L.LexRawToken(Tok); while (Tok.isNot(tok::eof)) { // Since we are lexing unexpanded tokens, all tokens are from the main // FileID. unsigned TokOffs = SourceMgr.getFullFilePos(Tok.getLocation()); unsigned TokLen = Tok.getLength(); switch (Tok.getKind()) { default: break; case tok::identifier: { // Fill in Result.IdentifierInfo, looking up the identifier in the // identifier table. IdentifierInfo *II = PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs); // If this is a pp-identifier, for a keyword, highlight it as such. if (II->getTokenID() != tok::identifier) { RB.InsertTextAfter(TokOffs, "", strlen("")); RB.InsertTextBefore(TokOffs+TokLen, "", strlen("")); } break; } case tok::comment: RB.InsertTextAfter(TokOffs, "", strlen("")); RB.InsertTextBefore(TokOffs+TokLen, "", strlen("")); break; case tok::hash: // FIXME: This isn't working because we're not in raw mode in the lexer. // Just cons up our own lexer here? // If this is a preprocessor directive, all tokens to end of line are too. if (Tok.isAtStartOfLine()) { RB.InsertTextAfter(TokOffs, "", strlen("")); // Find end of line. This is a hack. const char *LineEnd = SourceMgr.getCharacterData(Tok.getLocation()); unsigned TokEnd = TokOffs+strcspn(LineEnd, "\n\r"); RB.InsertTextBefore(TokEnd, "", strlen("")); } break; } L.LexRawToken(Tok); } } /// HighlightMacros - This uses the macro table state from the end of the /// file, to reexpand macros and insert (into the HTML) information about the /// macro expansions. This won't be perfectly perfect, but it will be /// reasonably close. void html::HighlightMacros(Rewriter &R, unsigned FileID, Preprocessor &PP) { RewriteBuffer &RB = R.getEditBuffer(FileID); // Inform the preprocessor that we don't want comments. PP.SetCommentRetentionState(false, false); // Start parsing the specified input file. PP.EnterMainSourceFile(); // Lex all the tokens. const SourceManager &SourceMgr = PP.getSourceManager(); Token Tok; PP.Lex(Tok); while (Tok.isNot(tok::eof)) { // Ignore non-macro tokens. if (!Tok.getLocation().isMacroID()) { PP.Lex(Tok); continue; } // Ignore tokens whose logical location was not the main file. SourceLocation LLoc = SourceMgr.getLogicalLoc(Tok.getLocation()); std::pair LLocInfo = SourceMgr.getDecomposedFileLoc(LLoc); if (LLocInfo.first != FileID) { PP.Lex(Tok); continue; } // Okay, we have the first token of a macro expansion: highlight the // instantiation. // Get the size of current macro call itself. // FIXME: This should highlight the args of a function-like // macro, using a heuristic. unsigned TokLen = Lexer::MeasureTokenLength(LLoc, SourceMgr); unsigned TokOffs = LLocInfo.second; RB.InsertTextAfter(TokOffs, "", strlen("")); RB.InsertTextBefore(TokOffs+TokLen, "", strlen("")); // Okay, eat this token, getting the next one. PP.Lex(Tok); // Skip all the rest of the tokens that are part of this macro // instantiation. It would be really nice to pop up a window with all the // spelling of the tokens or something. while (!Tok.is(tok::eof) && SourceMgr.getLogicalLoc(Tok.getLocation()) == LLoc) PP.Lex(Tok); } }