aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorDmitri Gribenko <gribozavr@gmail.com>2013-01-30 14:29:28 +0000
committerDmitri Gribenko <gribozavr@gmail.com>2013-01-30 14:29:28 +0000
commit5bd1e5ba000023910ad986a16dd16d7ca914750a (patch)
tree78f232852c9d0ed2ad99c2ecdd8edea5c96fc3ad /utils
parentb1c760ea2a7831100da5a9ed64291b34df0ddbe0 (diff)
Comment parsing: resolve more named character references
This reimplements r173850 with a better approach: (1) use a TableGen-generated matcher instead of doing a linear search; (2) avoid allocations for new strings by converting code points to string iterals with TableGen. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@173931 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'utils')
-rw-r--r--utils/TableGen/CMakeLists.txt1
-rw-r--r--utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp83
-rw-r--r--utils/TableGen/TableGen.cpp8
-rw-r--r--utils/TableGen/TableGenBackends.h1
4 files changed, 93 insertions, 0 deletions
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 534ac9af77..a858a214b0 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_tablegen(clang-tblgen CLANG
ClangASTNodesEmitter.cpp
ClangAttrEmitter.cpp
ClangCommentCommandInfoEmitter.cpp
+ ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
ClangCommentHTMLTagsEmitter.cpp
ClangDiagnosticsEmitter.cpp
ClangSACheckersEmitter.cpp
diff --git a/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
new file mode 100644
index 0000000000..3afe2b73f0
--- /dev/null
+++ b/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
@@ -0,0 +1,83 @@
+//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits an fficient function to translate HTML named
+// character references to UTF-8 sequences.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include <vector>
+
+using namespace llvm;
+
+/// \brief Convert a code point to the corresponding UTF-8 sequence represented
+/// as a C string literal.
+///
+/// \returns true on success.
+static bool translateCodePointToUTF8(unsigned CodePoint,
+ SmallVectorImpl<char> &CLiteral) {
+ char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+ char *TranslatedPtr = Translated;
+ if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
+ return false;
+
+ StringRef UTF8(Translated, TranslatedPtr - Translated);
+
+ raw_svector_ostream OS(CLiteral);
+ OS << "\"";
+ for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
+ OS << "\\x";
+ OS.write_hex(static_cast<unsigned char>(UTF8[i]));
+ }
+ OS << "\"";
+
+ return true;
+}
+
+namespace clang {
+void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
+ raw_ostream &OS) {
+ std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
+ std::vector<StringMatcher::StringPair> NameToUTF8;
+ SmallString<32> CLiteral;
+ for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
+ I != E; ++I) {
+ Record &Tag = **I;
+ std::string Spelling = Tag.getValueAsString("Spelling");
+ uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
+ CLiteral.clear();
+ CLiteral.append("return ");
+ if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
+ SrcMgr.PrintMessage(Tag.getLoc().front(),
+ SourceMgr::DK_Error,
+ Twine("invalid code point"));
+ continue;
+ }
+ CLiteral.append(";");
+
+ StringMatcher::StringPair Match(Spelling, CLiteral.str());
+ NameToUTF8.push_back(Match);
+ }
+
+ OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+ OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
+ " StringRef Name) {\n";
+ StringMatcher("Name", NameToUTF8, OS).Emit();
+ OS << " return StringRef();\n"
+ << "}\n\n";
+}
+
+} // end namespace clang
+
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 8af6598cd0..4097339b9a 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -44,6 +44,7 @@ enum ActionType {
GenClangSACheckers,
GenClangCommentHTMLTags,
GenClangCommentHTMLTagsProperties,
+ GenClangCommentHTMLNamedCharacterReferences,
GenClangCommentCommandInfo,
GenOptParserDefs, GenOptParserImpl,
GenArmNeon,
@@ -111,6 +112,10 @@ namespace {
"gen-clang-comment-html-tags-properties",
"Generate efficient matchers for HTML tag "
"properties"),
+ clEnumValN(GenClangCommentHTMLNamedCharacterReferences,
+ "gen-clang-comment-html-named-character-references",
+ "Generate function to translate named character "
+ "references to UTF-8 sequences"),
clEnumValN(GenClangCommentCommandInfo,
"gen-clang-comment-command-info",
"Generate list of commands that are used in "
@@ -194,6 +199,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenClangCommentHTMLTagsProperties:
EmitClangCommentHTMLTagsProperties(Records, OS);
break;
+ case GenClangCommentHTMLNamedCharacterReferences:
+ EmitClangCommentHTMLNamedCharacterReferences(Records, OS);
+ break;
case GenClangCommentCommandInfo:
EmitClangCommentCommandInfo(Records, OS);
break;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 637e54c01b..3bc4c906c0 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -51,6 +51,7 @@ void EmitClangSACheckers(RecordKeeper &Records, raw_ostream &OS);
void EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS);
void EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, raw_ostream &OS);
+void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records, raw_ostream &OS);
void EmitClangCommentCommandInfo(RecordKeeper &Records, raw_ostream &OS);