diff options
author | Dmitri Gribenko <gribozavr@gmail.com> | 2013-01-30 14:29:28 +0000 |
---|---|---|
committer | Dmitri Gribenko <gribozavr@gmail.com> | 2013-01-30 14:29:28 +0000 |
commit | 5bd1e5ba000023910ad986a16dd16d7ca914750a (patch) | |
tree | 78f232852c9d0ed2ad99c2ecdd8edea5c96fc3ad /utils | |
parent | b1c760ea2a7831100da5a9ed64291b34df0ddbe0 (diff) |
Comment parsing: resolve more named character references
This reimplements r173850 with a better approach:
(1) use a TableGen-generated matcher instead of doing a linear search;
(2) avoid allocations for new strings by converting code points to string
iterals with TableGen.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173931 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'utils')
-rw-r--r-- | utils/TableGen/CMakeLists.txt | 1 | ||||
-rw-r--r-- | utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp | 83 | ||||
-rw-r--r-- | utils/TableGen/TableGen.cpp | 8 | ||||
-rw-r--r-- | utils/TableGen/TableGenBackends.h | 1 |
4 files changed, 93 insertions, 0 deletions
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index 534ac9af77..a858a214b0 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -4,6 +4,7 @@ add_tablegen(clang-tblgen CLANG ClangASTNodesEmitter.cpp ClangAttrEmitter.cpp ClangCommentCommandInfoEmitter.cpp + ClangCommentHTMLNamedCharacterReferenceEmitter.cpp ClangCommentHTMLTagsEmitter.cpp ClangDiagnosticsEmitter.cpp ClangSACheckersEmitter.cpp diff --git a/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp new file mode 100644 index 0000000000..3afe2b73f0 --- /dev/null +++ b/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp @@ -0,0 +1,83 @@ +//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tablegen backend emits an fficient function to translate HTML named +// character references to UTF-8 sequences. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/StringMatcher.h" +#include <vector> + +using namespace llvm; + +/// \brief Convert a code point to the corresponding UTF-8 sequence represented +/// as a C string literal. +/// +/// \returns true on success. +static bool translateCodePointToUTF8(unsigned CodePoint, + SmallVectorImpl<char> &CLiteral) { + char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; + char *TranslatedPtr = Translated; + if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) + return false; + + StringRef UTF8(Translated, TranslatedPtr - Translated); + + raw_svector_ostream OS(CLiteral); + OS << "\""; + for (size_t i = 0, e = UTF8.size(); i != e; ++i) { + OS << "\\x"; + OS.write_hex(static_cast<unsigned char>(UTF8[i])); + } + OS << "\""; + + return true; +} + +namespace clang { +void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, + raw_ostream &OS) { + std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); + std::vector<StringMatcher::StringPair> NameToUTF8; + SmallString<32> CLiteral; + for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); + I != E; ++I) { + Record &Tag = **I; + std::string Spelling = Tag.getValueAsString("Spelling"); + uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); + CLiteral.clear(); + CLiteral.append("return "); + if (!translateCodePointToUTF8(CodePoint, CLiteral)) { + SrcMgr.PrintMessage(Tag.getLoc().front(), + SourceMgr::DK_Error, + Twine("invalid code point")); + continue; + } + CLiteral.append(";"); + + StringMatcher::StringPair Match(Spelling, CLiteral.str()); + NameToUTF8.push_back(Match); + } + + OS << "// This file is generated by TableGen. Do not edit.\n\n"; + + OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" + " StringRef Name) {\n"; + StringMatcher("Name", NameToUTF8, OS).Emit(); + OS << " return StringRef();\n" + << "}\n\n"; +} + +} // end namespace clang + diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp index 8af6598cd0..4097339b9a 100644 --- a/utils/TableGen/TableGen.cpp +++ b/utils/TableGen/TableGen.cpp @@ -44,6 +44,7 @@ enum ActionType { GenClangSACheckers, GenClangCommentHTMLTags, GenClangCommentHTMLTagsProperties, + GenClangCommentHTMLNamedCharacterReferences, GenClangCommentCommandInfo, GenOptParserDefs, GenOptParserImpl, GenArmNeon, @@ -111,6 +112,10 @@ namespace { "gen-clang-comment-html-tags-properties", "Generate efficient matchers for HTML tag " "properties"), + clEnumValN(GenClangCommentHTMLNamedCharacterReferences, + "gen-clang-comment-html-named-character-references", + "Generate function to translate named character " + "references to UTF-8 sequences"), clEnumValN(GenClangCommentCommandInfo, "gen-clang-comment-command-info", "Generate list of commands that are used in " @@ -194,6 +199,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenClangCommentHTMLTagsProperties: EmitClangCommentHTMLTagsProperties(Records, OS); break; + case GenClangCommentHTMLNamedCharacterReferences: + EmitClangCommentHTMLNamedCharacterReferences(Records, OS); + break; case GenClangCommentCommandInfo: EmitClangCommentCommandInfo(Records, OS); break; diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h index 637e54c01b..3bc4c906c0 100644 --- a/utils/TableGen/TableGenBackends.h +++ b/utils/TableGen/TableGenBackends.h @@ -51,6 +51,7 @@ void EmitClangSACheckers(RecordKeeper &Records, raw_ostream &OS); void EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS); void EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, raw_ostream &OS); +void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, raw_ostream &OS); void EmitClangCommentCommandInfo(RecordKeeper &Records, raw_ostream &OS); |