diff options
author | Manuel Klimek <klimek@google.com> | 2011-12-16 13:09:10 +0000 |
---|---|---|
committer | Manuel Klimek <klimek@google.com> | 2011-12-16 13:09:10 +0000 |
commit | 76f13017fc67b35f10b61e05c13f3643b714fccf (patch) | |
tree | edbd357bc27be562fbd44b9114dae1bd99c1e40f | |
parent | db21f4c187816b03d7b30d0d238f71cbd8a0a9a7 (diff) |
Adds a JSON parser and a benchmark (json-bench) to catch performance regressions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@146735 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | include/llvm/Support/JSONParser.h | 444 | ||||
-rw-r--r-- | lib/Support/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Support/JSONParser.cpp | 221 | ||||
-rw-r--r-- | test/CMakeLists.txt | 2 | ||||
-rw-r--r-- | test/Other/json-bench-test.ll | 5 | ||||
-rw-r--r-- | unittests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | unittests/Support/JSONParserTest.cpp | 218 | ||||
-rw-r--r-- | utils/Makefile | 2 | ||||
-rw-r--r-- | utils/json-bench/CMakeLists.txt | 5 | ||||
-rw-r--r-- | utils/json-bench/JSONBench.cpp | 77 | ||||
-rw-r--r-- | utils/json-bench/Makefile | 21 |
12 files changed, 996 insertions, 2 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index ec42120d10..dc66e3e517 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,6 +353,7 @@ add_subdirectory(utils/FileUpdate) add_subdirectory(utils/count) add_subdirectory(utils/not) add_subdirectory(utils/llvm-lit) +add_subdirectory(utils/json-bench) add_subdirectory(projects) diff --git a/include/llvm/Support/JSONParser.h b/include/llvm/Support/JSONParser.h new file mode 100644 index 0000000000..e959f590a2 --- /dev/null +++ b/include/llvm/Support/JSONParser.h @@ -0,0 +1,444 @@ +//===--- JsonParser.h - Simple JSON parser ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a JSON parser. +// +// See http://www.json.org/ for an overview. +// See http://www.ietf.org/rfc/rfc4627.txt for the full standard. +// +// FIXME: Currently this supports a subset of JSON. Specifically, support +// for numbers, booleans and null for values is missing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_JSON_PARSER_H +#define LLVM_CLANG_TOOLING_JSON_PARSER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" + +#include <string> + +namespace llvm { + +class JSONString; +class JSONValue; +class JSONKeyValuePair; + +/// \brief Base class for a parsable JSON atom. +/// +/// This class has no semantics other than being a unit of JSON data which can +/// be parsed out of a JSON document. +class JSONAtom { +public: + /// \brief Possible types of JSON objects. + enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String }; + + /// \brief Returns the type of this value. + Kind getKind() const { return MyKind; } + + static bool classof(const JSONAtom *Atom) { return true; } + +protected: + JSONAtom(Kind MyKind) : MyKind(MyKind) {} + +private: + /// \brief Parses to the end of the object and returns whether parsing + /// was successful. + bool skip() const; + + Kind MyKind; + + friend class JSONParser; + friend class JSONKeyValuePair; + template <typename, char, char, JSONAtom::Kind> friend class JSONContainer; +}; + +/// \brief A parser for JSON text. +/// +/// Use an object of JSONParser to iterate over the values of a JSON text. +/// All objects are parsed during the iteration, so you can only iterate once +/// over the JSON text, but the cost of partial iteration is minimized. +/// Create a new JSONParser if you want to iterate multiple times. +class JSONParser { +public: + /// \brief Create a JSONParser for the given input. + /// + /// Parsing is started via parseRoot(). Access to the object returned from + /// parseRoot() will parse the input lazily. + JSONParser(StringRef Input); + + /// \brief Returns the outermost JSON value (either an array or an object). + /// + /// Can return NULL if the input does not start with an array or an object. + /// The object is not parsed yet - the caller must either iterate over the + /// returned object or call 'skip' to trigger parsing. + /// + /// A JSONValue can be either a JSONString, JSONObject or JSONArray. + JSONValue *parseRoot(); + + /// \brief Parses the JSON text and returns whether it is valid JSON. + /// + /// In case validate() return false, failed() will return true and + /// getErrorMessage() will return the parsing error. + bool validate(); + + /// \brief Returns true if an error occurs during parsing. + /// + /// If there was an error while parsing an object that was created by + /// iterating over the result of 'parseRoot', 'failed' will return true. + bool failed() const; + + /// \brief Returns an error message when 'failed' returns true. + std::string getErrorMessage() const; + +private: + /// \brief These methods manage the implementation details of parsing new JSON + /// atoms. + /// @{ + JSONString *parseString(); + JSONValue *parseValue(); + JSONKeyValuePair *parseKeyValuePair(); + /// @} + + /// \brief Templated helpers to parse the elements out of both forms of JSON + /// containers. + /// @{ + template <typename AtomT> AtomT *parseElement(); + template <typename AtomT, char StartChar, char EndChar> + StringRef::iterator parseFirstElement(const AtomT *&Element); + template <typename AtomT, char EndChar> + StringRef::iterator parseNextElement(const AtomT *&Element); + /// @} + + /// \brief Whitespace parsing. + /// @{ + void nextNonWhitespace(); + bool isWhitespace(); + /// @} + + /// \brief These methods are used for error handling. + /// { + void setExpectedError(StringRef Expected, StringRef Found); + void setExpectedError(StringRef Expected, char Found); + bool errorIfAtEndOfFile(StringRef Message); + bool errorIfNotAt(char C, StringRef Message); + /// } + + /// All nodes are allocated by the parser and will be deallocated when the + /// parser is destroyed. + BumpPtrAllocator ValueAllocator; + + /// \brief The original input to the parser. + const StringRef Input; + + /// \brief The current position in the parse stream. + StringRef::iterator Position; + + /// \brief If non-empty, an error has occurred. + std::string ErrorMessage; + + template <typename AtomT, char StartChar, char EndChar, + JSONAtom::Kind ContainerKind> + friend class JSONContainer; +}; + + +/// \brief Base class for JSON value objects. +/// +/// This object represents an abstract JSON value. It is the root node behind +/// the group of JSON entities that can represent top-level values in a JSON +/// document. It has no API, and is just a placeholder in the type hierarchy of +/// nodes. +class JSONValue : public JSONAtom { +protected: + JSONValue(Kind MyKind) : JSONAtom(MyKind) {} + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + switch (Atom->getKind()) { + case JK_Array: + case JK_Object: + case JK_String: + return true; + case JK_KeyValuePair: + return false; + }; + llvm_unreachable("Invalid JSONAtom kind"); + } + static bool classof(const JSONValue *Value) { return true; } + ///@} +}; + +/// \brief Gives access to the text of a JSON string. +/// +/// FIXME: Implement a method to return the unescaped text. +class JSONString : public JSONValue { +public: + /// \brief Returns the underlying parsed text of the string. + /// + /// This is the unescaped content of the JSON text. + /// See http://www.ietf.org/rfc/rfc4627.txt for details. + StringRef getRawText() const { return RawText; }; + +private: + JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {} + + /// \brief Skips to the next position in the parse stream. + bool skip() const { return true; }; + + StringRef RawText; + + friend class JSONAtom; + friend class JSONParser; + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == JK_String; + } + static bool classof(const JSONString *String) { return true; } + ///@} +}; + +/// \brief A (key, value) tuple of type (JSONString *, JSONValue *). +/// +/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom. +/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray. +/// They are not viable as top-level values either. +class JSONKeyValuePair : public JSONAtom { +public: + const JSONString * const Key; + const JSONValue * const Value; + +private: + JSONKeyValuePair(const JSONString *Key, const JSONValue *Value) + : JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {} + + /// \brief Skips to the next position in the parse stream. + bool skip() const { return Value->skip(); }; + + friend class JSONAtom; + friend class JSONParser; + template <typename, char, char, JSONAtom::Kind> friend class JSONContainer; + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == JK_KeyValuePair; + } + static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; } + ///@} +}; + +/// \brief Implementation of JSON containers (arrays and objects). +/// +/// JSONContainers drive the lazy parsing of JSON arrays and objects via +/// forward iterators. Call 'skip' to validate parsing of all elements of the +/// container and to position the parse stream behind the container. +template <typename AtomT, char StartChar, char EndChar, + JSONAtom::Kind ContainerKind> +class JSONContainer : public JSONValue { +public: + /// \brief An iterator that parses the underlying container during iteration. + /// + /// Iterators on the same collection use shared state, so when multiple copies + /// of an iterator exist, only one is allowed to be used for iteration; + /// iterating multiple copies of an iterator of the same collection will lead + /// to undefined behavior. + class const_iterator : public std::iterator<std::forward_iterator_tag, + const AtomT*> { + public: + const_iterator(const const_iterator &I) : Container(I.Container) {} + + bool operator==(const const_iterator &I) const { + if (isEnd() || I.isEnd()) + return isEnd() == I.isEnd(); + return Container->Position == I.Container->Position; + } + bool operator!=(const const_iterator &I) const { return !(*this == I); } + + const_iterator &operator++() { + Container->parseNextElement(); + return *this; + } + + const AtomT *operator*() { return Container->Current; } + + private: + /// \brief Create an iterator for which 'isEnd' returns true. + const_iterator() : Container(0) {} + + /// \brief Create an iterator for the given container. + const_iterator(const JSONContainer *Container) : Container(Container) {} + + bool isEnd() const { + return Container == 0 || Container->Position == StringRef::iterator(); + } + + const JSONContainer * const Container; + + friend class JSONContainer; + }; + + /// \brief Returns a lazy parsing iterator over the container. + /// + /// As the iterator drives the parse stream, begin() must only be called + /// once per container. + const_iterator begin() const { + if (Started) + report_fatal_error("Cannot parse container twice."); + Started = true; + // Set up the position and current element when we begin iterating over the + // container. + Position = Parser->parseFirstElement<AtomT, StartChar, EndChar>(Current); + return const_iterator(this); + } + + const_iterator end() const { + return const_iterator(); + } + +private: + JSONContainer(JSONParser *Parser) + : JSONValue(ContainerKind), Parser(Parser), + Position(), Current(0), Started(false) {} + + const_iterator current() const { + if (!Started) + return begin(); + + return const_iterator(this); + } + + /// \brief Skips to the next position in the parse stream. + bool skip() const { + for (const_iterator I = current(), E = end(); I != E; ++I) { + assert(*I != 0); + if (!(*I)->skip()) + return false; + } + return !Parser->failed(); + } + + /// \brief Parse the next element in the container into the Current element. + /// + /// This routine is called as an iterator into this container walks through + /// its elements. It mutates the container's internal current node to point to + /// the next atom of the container. + void parseNextElement() const { + Current->skip(); + Position = Parser->parseNextElement<AtomT, EndChar>(Current); + } + + // For parsing, JSONContainers call back into the JSONParser. + JSONParser * const Parser; + + // 'Position', 'Current' and 'Started' store the state of the parse stream + // for iterators on the container, they don't change the container's elements + // and are thus marked as mutable. + mutable StringRef::iterator Position; + mutable const AtomT *Current; + mutable bool Started; + + friend class JSONAtom; + friend class JSONParser; + friend class const_iterator; + +public: + /// \brief dyn_cast helpers + ///@{ + static bool classof(const JSONAtom *Atom) { + return Atom->getKind() == ContainerKind; + } + static bool classof(const JSONContainer *Container) { return true; } + ///@} +}; + +/// \brief A simple JSON array. +typedef JSONContainer<JSONValue, '[', ']', JSONAtom::JK_Array> JSONArray; + +/// \brief A JSON object: an iterable list of JSON key-value pairs. +typedef JSONContainer<JSONKeyValuePair, '{', '}', JSONAtom::JK_Object> + JSONObject; + +/// \brief Template adaptor to dispatch element parsing for values. +template <> JSONValue *JSONParser::parseElement(); + +/// \brief Template adaptor to dispatch element parsing for key value pairs. +template <> JSONKeyValuePair *JSONParser::parseElement(); + +/// \brief Parses the first element of a JSON array or object, or closes the +/// array. +/// +/// The method assumes that the current position is before the first character +/// of the element, with possible white space in between. When successful, it +/// returns the new position after parsing the element. Otherwise, if there is +/// no next value, it returns a default constructed StringRef::iterator. +template <typename AtomT, char StartChar, char EndChar> +StringRef::iterator JSONParser::parseFirstElement(const AtomT *&Element) { + assert(*Position == StartChar); + Element = 0; + nextNonWhitespace(); + if (errorIfAtEndOfFile("value or end of container at start of container")) + return StringRef::iterator(); + + if (*Position == EndChar) + return StringRef::iterator(); + + Element = parseElement<AtomT>(); + if (Element == 0) + return StringRef::iterator(); + + return Position; +} + +/// \brief Parses the next element of a JSON array or object, or closes the +/// array. +/// +/// The method assumes that the current position is before the ',' which +/// separates the next element from the current element. When successful, it +/// returns the new position after parsing the element. Otherwise, if there is +/// no next value, it returns a default constructed StringRef::iterator. +template <typename AtomT, char EndChar> +StringRef::iterator JSONParser::parseNextElement(const AtomT *&Element) { + Element = 0; + nextNonWhitespace(); + if (errorIfAtEndOfFile("',' or end of container for next element")) + return 0; + + switch (*Position) { + case ',': + nextNonWhitespace(); + if (errorIfAtEndOfFile("element in container")) + return StringRef::iterator(); + + Element = parseElement<AtomT>(); + if (Element == 0) + return StringRef::iterator(); + + return Position; + + case EndChar: + return StringRef::iterator(); + + default: + setExpectedError("',' or end of container for next element", *Position); + return StringRef::iterator(); + } +} + +} // end namespace llvm + +#endif // LLVM_CLANG_TOOLING_JSON_PARSER_H diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 63a833c380..49abb1f246 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -29,6 +29,7 @@ add_llvm_library(LLVMSupport IntervalMap.cpp IsInf.cpp IsNAN.cpp + JSONParser.cpp ManagedStatic.cpp MemoryBuffer.cpp MemoryObject.cpp diff --git a/lib/Support/JSONParser.cpp b/lib/Support/JSONParser.cpp new file mode 100644 index 0000000000..30c2afcdfc --- /dev/null +++ b/lib/Support/JSONParser.cpp @@ -0,0 +1,221 @@ +//===--- JsonParser.cpp - Simple JSON parser ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a JSON parser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/JSONParser.h" + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" + +namespace llvm { + +JSONParser::JSONParser(StringRef Input) + : Input(Input), Position(Input.begin()) {} + +JSONValue *JSONParser::parseRoot() { + if (Position != Input.begin()) + report_fatal_error("Cannot resuse JSONParser."); + if (isWhitespace()) + nextNonWhitespace(); + if (errorIfAtEndOfFile("'[' or '{' at start of JSON text")) + return 0; + switch (*Position) { + case '[': + return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); + case '{': + return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); + default: + setExpectedError("'[' or '{' at start of JSON text", *Position); + return 0; + } +} + +bool JSONParser::validate() { + return parseRoot()->skip(); +} + +// Sets the current error to: +// "Error while parsing JSON: expected <Expected>, but found <Found>". +void JSONParser::setExpectedError(StringRef Expected, StringRef Found) { + ErrorMessage = ("Error while parsing JSON: expected " + + Expected + ", but found " + Found + ".").str(); +} + +// Sets the current error to: +// "Error while parsing JSON: expected <Expected>, but found <Found>". +void JSONParser::setExpectedError(StringRef Expected, char Found) { + setExpectedError(Expected, StringRef(&Found, 1)); +} + +// If there is no character available, returns true and sets the current error +// to: "Error while parsing JSON: expected <Expected>, but found EOF.". +bool JSONParser::errorIfAtEndOfFile(StringRef Expected) { + if (Position == Input.end()) { + setExpectedError(Expected, "EOF"); + return true; + } + return false; +} + +// Sets the current error if the current character is not C to: +// "Error while parsing JSON: expected 'C', but got <current character>". +bool JSONParser::errorIfNotAt(char C, StringRef Message) { + if (Position == Input.end() || *Position != C) { + std::string Expected = + ("'" + StringRef(&C, 1) + "' " + Message).str(); + if (Position == Input.end()) + setExpectedError(Expected, "EOF"); + else + setExpectedError(Expected, *Position); + return true; + } + return false; +} + +// Forbidding inlining improves performance by roughly 20%. +// FIXME: Remove once llvm optimizes this to the faster version without hints. +LLVM_ATTRIBUTE_NOINLINE static bool +wasEscaped(StringRef::iterator First, StringRef::iterator Position); + +// Returns whether a character at 'Position' was escaped with a leading '\'. +// 'First' specifies the position of the first character in the string. +static bool wasEscaped(StringRef::iterator First, + StringRef::iterator Position) { + assert(Position - 1 >= First); + StringRef::iterator I = Position - 1; + // We calulate the number of consecutive '\'s before the current position + // by iterating backwards through our string. + while (I >= First && *I == '\\') --I; + // (Position - 1 - I) now contains the number of '\'s before the current + // position. If it is odd, the character at 'Positon' was escaped. + return (Position - 1 - I) % 2 == 1; +} + +// Parses a JSONString, assuming that the current position is on a quote. +JSONString *JSONParser::parseString() { + assert(Position != Input.end()); + assert(!isWhitespace()); + if (errorIfNotAt('"', "at start of string")) + return 0; + StringRef::iterator First = Position + 1; + + // Benchmarking shows that this loop is the hot path of the application with + // about 2/3rd of the runtime cycles. Since escaped quotes are not the common + // case, and multiple escaped backslashes before escaped quotes are very rare, + // we pessimize this case to achieve a smaller inner loop in the common case. + // We're doing that by having a quick inner loop that just scans for the next + // quote. Once we find the quote we check the last character to see whether + // the quote might have been escaped. If the last character is not a '\', we + // know the quote was not escaped and have thus found the end of the string. + // If the immediately preceding character was a '\', we have to scan backwards + // to see whether the previous character was actually an escaped backslash, or + // an escape character for the quote. If we find that the current quote was + // escaped, we continue parsing for the next quote and repeat. + // This optimization brings around 30% performance improvements. + do { + // Step over the current quote. + ++Position; + // Find the next quote. + while (Position != Input.end() && *Position != '"') + ++Position; + if (errorIfAtEndOfFile("\" at end of string")) + return 0; + // Repeat until the previous character was not a '\' or was an escaped + // backslash. + } while (*(Position - 1) == '\\' && wasEscaped(First, Position)); + + return new (ValueAllocator.Allocate<JSONString>()) + JSONString(StringRef(First, Position - First)); +} + + +// Advances the position to the next non-whitespace position. +void JSONParser::nextNonWhitespace() { + do { + ++Position; + } while (isWhitespace()); +} + +// Checks if there is a whitespace character at the current position. +bool JSONParser::isWhitespace() { + return Position != Input.end() && (*Position == ' ' || *Position == '\t' || + *Position == '\n' || *Position == '\r'); +} + +bool JSONParser::failed() const { + return !ErrorMessage.empty(); +} + +std::string JSONParser::getErrorMessage() const { + return ErrorMessage; +} + +bool JSONAtom::skip() const { + switch (MyKind) { + case JK_Array: return cast<JSONArray>(this)->skip(); + case JK_Object: return cast<JSONObject>(this)->skip(); + case JK_String: return cast<JSONString>(this)->skip(); + case JK_KeyValuePair: return cast<JSONKeyValuePair>(this)->skip(); + } + llvm_unreachable("Impossible enum value."); +} + +// Parses a JSONValue, assuming that the current position is at the first +// character of the value. +JSONValue *JSONParser::parseValue() { + assert(Position != Input.end()); + assert(!isWhitespace()); + switch (*Position) { + case '[': + return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); + case '{': + return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); + case '"': + return parseString(); + default: + setExpectedError("'[', '{' or '\"' at start of value", *Position); + return 0; + } +} + +// Parses a JSONKeyValuePair, assuming that the current position is at the first +// character of the key, value pair. +JSONKeyValuePair *JSONParser::parseKeyValuePair() { + assert(Position != Input.end()); + assert(!isWhitespace()); + + JSONString *Key = parseString(); + if (Key == 0) + return 0; + + nextNonWhitespace(); + if (errorIfNotAt(':', "between key and value")) + return 0; + + nextNonWhitespace(); + const JSONValue *Value = parseValue(); + if (Value == 0) + return 0; + + return new (ValueAllocator.Allocate<JSONKeyValuePair>(1)) + JSONKeyValuePair(Key, Value); +} + +template <> JSONValue *JSONParser::parseElement() { + return parseValue(); +} + +template <> JSONKeyValuePair *JSONParser::parseElement() { + return parseKeyValuePair(); +} + +} // end namespace llvm diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cbad83cae9..89e8103ab7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -77,5 +77,5 @@ add_dependencies(check.deps BugpointPasses LLVMHello llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump macho-dump opt - FileCheck count not) + FileCheck count not json-bench) set_target_properties(check.deps PROPERTIES FOLDER "Tests") diff --git a/test/Other/json-bench-test.ll b/test/Other/json-bench-test.ll new file mode 100644 index 0000000000..d4e689b2a8 --- /dev/null +++ b/test/Other/json-bench-test.ll @@ -0,0 +1,5 @@ +; RUN: json-bench --verify --info-output-file=- | FileCheck %s + +; CHECK: Fast: Parsing +; CHECK: Fast: Loop + diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 0fabf71066..2eff1642c4 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -133,6 +133,7 @@ add_llvm_unittest(Support Support/CommandLineTest.cpp Support/ConstantRangeTest.cpp Support/EndianTest.cpp + Support/JSONParserTest.cpp Support/LeakDetectorTest.cpp Support/MathExtrasTest.cpp Support/Path.cpp diff --git a/unittests/Support/JSONParserTest.cpp b/unittests/Support/JSONParserTest.cpp new file mode 100644 index 0000000000..1cd987daf1 --- /dev/null +++ b/unittests/Support/JSONParserTest.cpp @@ -0,0 +1,218 @@ +//===- unittest/Tooling/JSONParserTest ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Casting.h" +#include "llvm/Support/JSONParser.h" +#include "llvm/ADT/Twine.h" +#include "gtest/gtest.h" + +namespace llvm { + +// Returns a buffer that contains the content of the given string without +// the trailing zero, in order to get valgrind to catch out-of-bound reads. +static std::vector<char> CutTrailingZero(StringRef String) { + std::vector<char> InputWithoutZero(String.size()); + memcpy(&InputWithoutZero[0], String.data(), String.size()); + return InputWithoutZero; +} + +// Checks that the given input gives a parse error. Makes sure that an error +// text is available and the parse fails. +static void ExpectParseError(StringRef Message, + const std::vector<char> &InputWithoutZero) { + StringRef Input = StringRef(&InputWithoutZero[0], InputWithoutZero.size()); + JSONParser Parser(Input); + EXPECT_FALSE(Parser.validate()) << Message << ": " << Input; + EXPECT_TRUE(Parser.failed()) << Message << ": " << Input; + EXPECT_FALSE(Parser.getErrorMessage().empty()) << Message << ": " << Input; +} + +// Overloads the above to allow using const char * as Input. +static void ExpectParseError(StringRef Message, StringRef Input) { + return ExpectParseError(Message, CutTrailingZero(Input)); +} + +// Checks that the given input can be parsed without error. +static void ExpectParseSuccess(StringRef Message, + const std::vector<char> &InputWithoutZero) { + StringRef Input = StringRef(&InputWithoutZero[0], InputWithoutZero.size()); + JSONParser Parser(Input); + EXPECT_TRUE(Parser.validate()) + << Message << ": " << Input << " - " << Parser.getErrorMessage(); +} + +// Overloads the above to allow using const char * as Input. +static void ExpectParseSuccess(StringRef Message, StringRef Input) { + return ExpectParseSuccess(Message, CutTrailingZero(Input)); +} + +TEST(JSONParser, FailsOnEmptyString) { + JSONParser Parser(""); + EXPECT_EQ(NULL, Parser.parseRoot()); +} + +TEST(JSONParser, DoesNotReadAfterInput) { + JSONParser Parser(llvm::StringRef(NULL, 0)); + EXPECT_EQ(NULL, Parser.parseRoot()); +} + +TEST(JSONParser, FailsIfStartsWithString) { + JSONParser Character("\"x\""); + EXPECT_EQ(NULL, Character.parseRoot()); +} + +TEST(JSONParser, ParsesEmptyArray) { + ExpectParseSuccess("Empty array", "[]"); +} + +TEST(JSONParser, FailsIfNotClosingArray) { + ExpectParseError("Not closing array", "["); + ExpectParseError("Not closing array", " [ "); + ExpectParseError("Not closing array", " [x"); +} + +TEST(JSONParser, ParsesEmptyArrayWithWhitespace) { + ExpectParseSuccess("Array with spaces", " [ ] "); + ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n"); +} + +TEST(JSONParser, ParsesEmptyObject) { + ExpectParseSuccess("Empty object", "[{}]"); +} + +TEST(JSONParser, ParsesObject) { + ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]"); +} + +TEST(JSONParser, ParsesMultipleKeyValuePairsInObject) { + ExpectParseSuccess("Multiple key, value pairs", + "[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]"); +} + +TEST(JSONParser, FailsIfNotClosingObject) { + ExpectParseError("Missing close on empty", "[{]"); + ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]"); +} + +TEST(JSONParser, FailsIfMissingColon) { + ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]"); + ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]"); +} + +TEST(JSONParser, FailsOnMissingQuote) { + ExpectParseError("Missing open quote", "[{a\":\"b\"}]"); + ExpectParseError("Missing closing quote", "[{\"a\":\"b}]"); +} + +TEST(JSONParser, ParsesEscapedQuotes) { + ExpectParseSuccess("Parses escaped string in key and value", + "[{\"a\":\"\\\"b\\\" \\\" \\\"\"}]"); +} + +TEST(JSONParser, ParsesEmptyString) { + ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]"); +} + +TEST(JSONParser, FailsOnMissingString) { + ExpectParseError("Missing value", "[{\"a\":}]"); + ExpectParseError("Missing key", "[{:\"b\"}]"); +} + +TEST(JSONParser, ParsesMultipleObjects) { + ExpectParseSuccess( + "Multiple objects in array", + "[" + " { \"a\" : \"b\" }," + " { \"a\" : \"b\" }," + " { \"a\" : \"b\" }" + "]"); +} + +TEST(JSONParser, FailsOnMissingComma) { + ExpectParseError( + "Missing comma", + "[" + " { \"a\" : \"b\" }" + " { \"a\" : \"b\" }" + "]"); +} + +TEST(JSONParser, FailsOnSuperfluousComma) { + ExpectParseError("Superfluous comma in array", "[ { \"a\" : \"b\" }, ]"); + ExpectParseError("Superfluous comma in object", "{ \"a\" : \"b\", }"); +} + +TEST(JSONParser, ParsesSpacesInBetweenTokens) { + ExpectParseSuccess( + "Various whitespace between tokens", + " \t \n\n \r [ \t \n\n \r" + " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :" + " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r" + " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :" + " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r"); +} + +TEST(JSONParser, ParsesArrayOfArrays) { + ExpectParseSuccess("Array of arrays", "[[]]"); +} + +TEST(JSONParser, HandlesEndOfFileGracefully) { + ExpectParseError("In string starting with EOF", "[\""); + ExpectParseError("In string hitting EOF", "[\" "); + ExpectParseError("In string escaping EOF", "[\" \\"); + ExpectParseError("In array starting with EOF", "["); + ExpectParseError("In array element starting with EOF", "[[], "); + ExpectParseError("In array hitting EOF", "[[] "); + ExpectParseError("In array hitting EOF", "[[]"); + ExpectParseError("In object hitting EOF", "{\"\""); +} + +// Checks that the given string can be parsed into an identical string inside +// of an array. +static void ExpectCanParseString(StringRef String) { + std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str(); + JSONParser Parser(StringInArray); + const JSONArray *ParsedArray = dyn_cast<JSONArray>(Parser.parseRoot()); + StringRef ParsedString = + dyn_cast<JSONString>(*ParsedArray->begin())->getRawText(); + EXPECT_EQ(String, ParsedString.str()) << Parser.getErrorMessage(); +} + +// Checks that parsing the given string inside an array fails. +static void ExpectCannotParseString(StringRef String) { + std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str(); + ExpectParseError((Twine("When parsing string \"") + String + "\"").str(), + StringInArray); +} + +TEST(JSONParser, ParsesStrings) { + ExpectCanParseString(""); + ExpectCannotParseString("\\"); + ExpectCannotParseString("\""); + ExpectCanParseString(" "); + ExpectCanParseString("\\ "); + ExpectCanParseString("\\\ |