aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStuart Sierra <mail@stuartsierra.com>2008-04-21 14:04:30 +0000
committerStuart Sierra <mail@stuartsierra.com>2008-04-21 14:04:30 +0000
commit72d5e41479b462b5ce0115abbfbc50787780dafc (patch)
tree90e0bb38fe1015e370dbec55d11bcfcbfdd072d1
parentda17800b12d2c769adc24cc11f027257f7bd4345 (diff)
Added ALPHA xml-stream-reader.clj and xml-stream-writer.clj
-rw-r--r--xml-stream-reader.clj282
-rw-r--r--xml-stream-writer.clj225
2 files changed, 507 insertions, 0 deletions
diff --git a/xml-stream-reader.clj b/xml-stream-reader.clj
new file mode 100644
index 00000000..51abb204
--- /dev/null
+++ b/xml-stream-reader.clj
@@ -0,0 +1,282 @@
+;;; xml-stream-reader.clj -- StAX XML reading for Clojure
+
+;; by Stuart Sierra
+;; Version 1; April 21, 2008
+
+;; THIS IS AN 'ALPHA' RELEASE AND THE API IS SUBJECT TO CHANGE.
+
+
+;; Copyright (c) Stuart Sierra. All rights reserved. The use and
+;; distribution terms for this software are covered by the Common
+;; Public License 1.0 (http://opensource.org/licenses/cpl1.0.php)
+;; which can be found in the file CPL.TXT at the root of the Clojure
+;; distribution. By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license. You must not
+;; remove this notice, or any other, from this software.
+
+
+;; This file defines a stream-based parser for XML, based on the StAX
+;; API, included with the JDK since version 6.
+;;
+;; StAX documentation is available at
+;; https://jaxp-sources.dev.java.net/nonav/docs/api/javax/xml/stream/XMLStreamReader.html
+;;
+;; The operation of the parser is similar to SAX event-based parsers,
+;; but the API is simpler and (supposedly) faster than SAX. It also
+;; provides XML namespace support.
+;;
+;; This is a lower-level interface than Clojure's xml.clj. It does
+;; not build up a data structure representing the XML. It merely
+;; calls a handler function, which you must provide, for each event in
+;; the XML stream, and provides some convenient macros to query the
+;; current event.
+;;
+;; See the "PUBLIC API", below, for more instructions.
+;;
+;; This parser will use the Apache XML Commons resolver, if it is
+;; available, to avoid downloading DTDs from the web. See
+;; http://xml.apache.org/commons/components/resolver/index.html
+;;
+;; If the XML Commons resolver jar is on your classpath, and your XML
+;; catalogs are properly configured, this library will use local
+;; versions of the XML DTDs.
+
+
+(clojure/in-ns 'xml-stream-reader)
+(clojure/refer 'clojure)
+
+
+;;; PRIVATE
+
+(import '(javax.xml.stream XMLInputFactory XMLStreamConstants
+ XMLStreamReader))
+
+(def #^XMLStreamReader *xml-input-stream*) ; dynamically bound during parsing
+
+;; Use Apache's DTD Resolver if it is available.
+(try
+ (def
+ #^{:private true
+ :doc "Instance of ResolvingXMLReader from the Apach XML Commons,
+ or nil if that class cannot be found."}
+ *dtd-resolver-impl*
+ (.. Class (forName "org.apache.xml.resolver.tools.ResolvingXMLReader")
+ (newInstance)))
+ ;; XMLStreamReader uses a non-SAX interface, so we have to wrap
+ ;; the entity resolver in a different interface.
+ (def
+ #^{:private true
+ :doc "Proxy class to implement the StAX XMLResolver interface."}
+ *dtd-resolver*
+ (proxy [javax.xml.stream.XMLResolver] []
+ ;; Uncomment (prn ... ) lines below to debug the resolver.
+ (resolveEntity [publicID systemID baseURI namespace]
+ ;;(prn "Resolving" publicID systemID baseURI namespace)
+ (let [entity (. *dtd-resolver-impl* (resolveEntity publicID systemID))]
+ (when entity
+ (let [#^String systemid (. entity (getSystemId))]
+ (when (and systemid (. systemid (startsWith "file:")))
+ ;;(prn "Found local system ID" systemid)
+ (new java.io.FileInputStream (subs systemid 5)))))))))
+ (catch Exception e ;; could not find ResolvingXMLReader
+ (def *dtd-resolver-impl* nil)
+ (def *dtd-resolver* nil)))
+
+(def
+ #^{:private true
+ :doc "Global XMLInputFactory. Note: the API documentation does
+ not specify if XMLInputFactory is thread-safe."}
+ *xml-input-factory*
+ (. XMLInputFactory (newInstance)))
+
+;; Use the Apache DTD Resolver if it's available.
+(when *dtd-resolver* (. *xml-input-factory* (setXMLResolver *dtd-resolver*)))
+
+(def
+ #^{:private true
+ :doc "Pre-allocated Throwable used to quit XML stream parsing."}
+ +stop-parsing+ (new Throwable "Finished parsing XML."))
+
+
+;;; PUBLIC API
+
+(defn xml-stream-reader
+ "Creates an XMLStreamReader from the source (a Reader or
+ InputStream). You should call close() on the XMLStreamReader when
+ you are finished with it. Normally this is called automatically by
+ 'parse-xml-stream'."
+ [input]
+ (. *xml-input-factory* (createXMLStreamReader input)))
+
+
+(defn parse-xml-stream
+ "Parses 'source' (a Reader or InputStream) with XMLStreamReader.
+ Calls 'handler-function' once for each event in the XML stream, with
+ the event type (one of the XMLStreamConstants) as the argument.
+
+ Use the accessor functions like (lname), (text), and (attr-value...)
+ to get information about the current XML event.
+
+ Use (stop-xml-parse) to quit parsing before the entire document has
+ been read.
+
+ This function ensures that the XMLStreamReader is closed when
+ parsing is completed or stopped, but it does NOT close the provided
+ input source."
+ [handler-function source]
+ (let [xml-stream (xml-stream-reader source)]
+ (try
+ (binding [*xml-input-stream* xml-stream]
+ (loop [] ; while
+ (when (. *xml-input-stream* (hasNext))
+ (handler-function (. *xml-input-stream* (next)))
+ (recur))))
+ (catch Throwable t
+ (when-not (identical? t +stop-parsing+)
+ (throw t)))
+ (finally (. xml-stream (close))))))
+
+
+;;; XML EVENT TYPE CONSTANTS
+;; Use these to dispatch based on event type in your handler function.
+
+(def ATTRIBUTE (. XMLStreamConstants ATTRIBUTE))
+(def CDATA (. XMLStreamConstants CDATA))
+(def CHARACTERS (. XMLStreamConstants CHARACTERS))
+(def COMMENT (. XMLStreamConstants COMMENT))
+(def DTD (. XMLStreamConstants DTD))
+(def END_DOCUMENT (. XMLStreamConstants END_DOCUMENT))
+(def END_ELEMENT (. XMLStreamConstants END_ELEMENT))
+(def ENTITY_DECLARATION (. XMLStreamConstants ENTITY_DECLARATION))
+(def ENTITY_REFERENCE (. XMLStreamConstants ENTITY_REFERENCE))
+(def NAMESPACE (. XMLStreamConstants NAMESPACE))
+(def NOTATION_DECLARATION (. XMLStreamConstants NOTATION_DECLARATION))
+(def PROCESSING_INSTRUCTION (. XMLStreamConstants PROCESSING_INSTRUCTION))
+(def SPACE (. XMLStreamConstants SPACE))
+(def START_DOCUMENT (. XMLStreamConstants START_DOCUMENT))
+(def START_ELEMENT (. XMLStreamConstants START_ELEMENT))
+
+
+;;; PUBLIC XML EVENT ACCESSORS
+
+(defn stop-xml-parse
+ "Stop the XML stream parser and return from 'parse-xml-stream'."
+ [] (throw +stop-parsing+))
+
+(defmacro event-type
+ "Returns the event type (a static in XMLStreamConstants) of the
+ current XML event. Normally not needed, because your handler
+ function will receive the event type as its argument."
+ []
+ '(. *xml-input-stream* (getEventType)))
+
+(defmacro pi-target
+ "Returns the target of an XML processing instruction."
+ []
+ '(. *xml-input-stream* (getPITarget)))
+
+(defmacro pi-data
+ "Returns the data of an XML processing instruction."
+ []
+ '(. *xml-input-stream* (getPIData)))
+
+(defmacro prefix
+ "Returns the prefix of a namespace-qualified XML element."
+ []
+ '(let [p (. *xml-input-stream* (getPrefix))]
+ (if (. p (isEmpty)) nil (keyword p))))
+
+(defmacro lname
+ "Returns the local name of an XML element."
+ []
+ '(. *xml-input-stream* (getLocalName)))
+
+(defmacro xmlns
+ "Returns the namespace URI of a namespace-qualified XML element."
+ []
+ '(. *xml-input-stream* (getNamespaceURI)))
+
+(defmacro whitespace?
+ "Returns true if the current XML character node contains only
+ whitespace. Implementation-dependent."
+ []
+ '(. *xml-input-stream* (isWhiteSpace)))
+
+(defmacro text
+ "Returns the text of the current XML character node."
+ []
+ '(. *xml-input-stream* (getText)))
+
+(defmacro attr-count
+ "Returns the number of attributes on the current XML element."
+ []
+ '(. *xml-input-stream* (getAttributeCount)))
+
+(defn attr-value
+ "Returns the value of the attribute on the current XML element."
+ ([local-name] (. *xml-input-stream* (getAttributeValue nil local-name)))
+ ([local-name xmlns] (. *xml-input-stream* (getAttributeValue xmlns local-name))))
+
+(defstruct attribute :lname :xmlns :prefix :value)
+
+(defn nth-attr
+ "Returns a struct representing the nth attribute of the current XML
+ element. The struct has 4 parts, :lname (local name), :xmlns,
+ :prefix, and :value."
+ [n]
+ (struct attribute
+ (. *xml-input-stream* (getAttributeLocalName n))
+ (. *xml-input-stream* (getAttributeNamespace n))
+ (. *xml-input-stream* (getAttributePrefix n))
+ (. *xml-input-stream* (getAttributeValue n))))
+
+(defn attrs
+ "Returns a seq of attribute structures for all attributes on the
+ current XML element."
+ []
+ (doall ; have to get all attributes before next XML event
+ (for [index (range (attr-count))]
+ (nth-attr index))))
+
+
+
+;; Valid methods for each state:
+;; from https://jaxp-sources.dev.java.net/nonav/docs/api/javax/xml/stream/XMLStreamReader.html
+;;
+;; All States getProperty(), hasNext(), require(), close(),
+;; getNamespaceURI(), isStartElement(), isEndElement(),
+;; isCharacters(), isWhiteSpace(), getNamespaceContext(),
+;; getEventType(),getLocation(), hasText()
+;;
+;; START_ELEMENT next(), getName(), getLocalName(), hasName(),
+;; getPrefix(), getAttributeXXX(),
+;; isAttributeSpecified(), getNamespaceXXX(),
+;; getElementText(), nextTag()
+;;
+;; ATTRIBUTE next(), nextTag() getAttributeXXX(),
+;; isAttributeSpecified(),
+;;
+;; NAMESPACE next(), nextTag() getNamespaceXXX()
+;;
+;; END_ELEMENT next(), getName(), getLocalName(), hasName(),
+;; getPrefix(), getNamespaceXXX(), nextTag()
+;;
+;; CHARACTERS next(), getTextXXX(), nextTag()
+;;
+;; CDATA next(), getTextXXX(), nextTag()
+;;
+;; COMMENT next(), getTextXXX(), nextTag()
+;;
+;; SPACE next(), getTextXXX(), nextTag()
+;;
+;; START_DOCUMENT next(), getEncoding(), next(), getPrefix(),
+;; getVersion(), isStandalone(), standaloneSet(),
+;; getCharacterEncodingScheme(), nextTag()
+;;
+;; END_DOCUMENT close()
+;;
+;; PROCESSING_INSTRUCTION next(), getPITarget(), getPIData(), nextTag()
+;;
+;; ENTITY_REFERENCE next(), getLocalName(), getText(), nextTag()
+;;
+;; DTD next(), getText(), nextTag()
diff --git a/xml-stream-writer.clj b/xml-stream-writer.clj
new file mode 100644
index 00000000..7566ac46
--- /dev/null
+++ b/xml-stream-writer.clj
@@ -0,0 +1,225 @@
+;;; xml-stream-writer.clj -- Clojure interface to XMLStreamWriter
+
+;; by Stuart Sierra <mail@stuartsierra.com>
+;; Version 1; April 21, 2008
+
+;; THIS IS AN 'ALPHA' RELEASE AND THE API IS SUBJECT TO CHANGE.
+
+
+;; Copyright (c) 2008 Stuart Sierra. All rights reserved. The use and
+;; distribution terms for this software are covered by the Common
+;; Public License 1.0 (http://www.opensource.org/licenses/cpl1.0.php)
+;; which can be found in the file CPL.TXT at the root of this
+;; distribution. By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license. You must not
+;; remove this notice, or any other, from this software.
+
+
+;; This file defines a Clojure interface to the StAX XML writer API,
+;; defined in javax.xml.stream.XMLOutputStream, included with the JDK
+;; since version 6.
+;;
+;; The main entry point is the 'with-xml-output' macro, which takes
+;; one argument, an open java.io.Writer, and a body. This macro opens
+;; a new XMLStreamWriter on the given output stream and ensures that
+;; all document tags are closed. It does NOT close the underlying
+;; Writer.
+;;
+;; Within the body of 'with-xml-output', certain forms will be
+;; translated into XML:
+;;
+;; "This is a string." => This is a string.
+;; [:foo "this & that"] => <foo>this &amp; that</foo>
+;; [:foo] => <foo/>
+;;
+;; Attributes are specified as maps:
+;;
+;; [:foo {:bar "this & that"}] => <foo bar="this &amp; that"/>
+;;
+;; Declare XML namespace prefixes inside 'emit-xml' with
+;; (xmlns prefix uri), where 'prefix' is a keyword.
+;;
+;; Once a namespace has been declared, create elements in that
+;; namespace by using a 2-element vector [prefix localname] as the tag
+;; name:
+;;
+;; (with-xml-output *out*
+;; (xmlns :me "http://xmlns.com/mine")
+;; [[:me :foo] "content"])
+;;
+;; => <me:foo xmlns:me="http://xmlns.com/mine">content</me:foo>
+;;
+;; Same with attributes:
+;;
+;; (with-xml-output *out*
+;; (xmlns :me "http://xmlns.com/mine")
+;; [:foo {[:me bar] "value"}])
+;;
+;; => <foo me:bar="value" xmlns:me="http://xmlns.com/mine"/>
+;;
+;; Undeclared namespaces will get an auto-generated prefix:
+;;
+;; (with-xml-output *out* [["http://myns.com/" :foo]])
+;;
+;; => <zdef18:foo xmlns:zdef18="http://myns.com/"/>
+;;
+;; Normal function calls and symbols may be placed in the body of
+;; 'with-xml-output'; they will be evaluated normally and their return
+;; values will be included as text in the XML output. To avoid adding
+;; text to the output, functions should return nil.
+;;
+;; To emit XML tags from a function without creating a new
+;; XMLStreamWriter, use the 'emit-xml' macro:
+;;
+;; (defn foo [name] (emit-xml [:mytag {:id name}]))
+;;
+;; (with-xml-output *out*
+;; [:root (foo 24) (foo 24)])
+;;
+;; => <root><mytag id="24"/><mytag id="24"/></root>
+;;
+;; To create the default XML declaration (version 1.0 and UTF-8
+;; encoding), use the (xmldecl) macro:
+;;
+;; (with-xml-output *out* (xmldecl))
+;;
+;; => <?xml version="1.0" encoding="UTF-8"?>
+;;
+
+
+
+(clojure/in-ns 'xml-stream-writer)
+(clojure/refer 'clojure)
+
+(import '(javax.xml.stream XMLOutputFactory))
+
+(defn- to-name [x]
+ (cond
+ (keyword? x) (name x)
+ (string? x) x
+ (symbol? x) (name x)
+ true (str x)))
+
+(def
+ *xml-output-factory*
+ (let [factory (XMLOutputFactory.newInstance)]
+ (. factory setProperty
+ "javax.xml.stream.isRepairingNamespaces" true)
+ factory))
+
+(def *xml-stream-writer*)
+
+(def *xmlns-prefixes*)
+
+(def *xml-indent* nil)
+
+(def *xml-tag-depth*)
+
+(defn apply-indent []
+ (when *xml-indent*
+ (. *xml-stream-writer* writeCharacters "\n")
+ (dotimes i (* *xml-indent* *xml-tag-depth*)
+ (. *xml-stream-writer* writeCharacters " "))))
+
+(defn start-tag
+ ([name]
+ (apply-indent)
+ (set! *xml-tag-depth* (inc *xml-tag-depth*))
+ (. *xml-stream-writer*
+ writeStartElement (to-name name)))
+ ([namespace name]
+ (apply-indent)
+ (set! *xml-tag-depth* (inc *xml-tag-depth*))
+ (. *xml-stream-writer*
+ writeStartElement namespace (to-name name))))
+
+(defn empty-tag
+ ([name]
+ (. *xml-stream-writer*
+ writeEmptyElement (to-name name)))
+ ([namespace name]
+ (. *xml-stream-writer*
+ writeEmptyElement namespace (to-name name))))
+
+(defn xml-attr
+ ([name value]
+ (. *xml-stream-writer* writeAttribute (to-name name) value))
+ ([namespace name value]
+ (. *xml-stream-writer* writeAttribute namespace (to-name name) value)))
+
+(defn xml-text [t]
+ (when t
+ (apply-indent)
+ (. *xml-stream-writer* writeCharacters (str t))))
+
+(defn end-tag []
+ (set! *xml-tag-depth* (dec *xml-tag-depth*))
+ (apply-indent)
+ (. *xml-stream-writer* writeEndElement))
+
+(defn entity [name]
+ (. *xml-stream-writer* writeEntityRef (to-name name)))
+
+(def emit-form)
+
+(defn resolve-xmlns [x]
+ (if (keyword? x)
+ (*xmlns-prefixes* x)
+ x))
+
+(defn- emit-tag [form]
+ (if (or (= 1 (count form))
+ (and (= 2 (count form))
+ (map? (second form)))) ; is the element empty?
+ (apply list 'do
+ (let [tag (first form)]
+ (if (vector? tag) ; is the element namespaced?
+ (list 'empty-tag (list 'resolve-xmlns (first tag)) (second tag))
+ (list 'empty-tag tag)))
+ (map emit-form (rest form))) ; still have to handle attributes
+ (apply list 'do
+ (let [tag (first form)] ; non-empty element
+ (if (vector? tag) ; is the element namespaced?
+ (list 'start-tag (list 'resolve-xmlns (first tag)) (second tag))
+ (list 'start-tag tag)))
+ (concat (map emit-form (rest form))
+ (list '(end-tag))))))
+
+(defn- emit-xml-attr [name-value]
+ (let [[name value] name-value]
+ (if (vector? name)
+ (list 'xml-attr (first name) (second name) value)
+ (list 'xml-attr name value))))
+
+(defn- emit-xml-attrs [xml-attr-map]
+ (apply list 'do
+ (map emit-xml-attr xml-attr-map)))
+
+(defn- emit-form [form]
+ (cond
+ (vector? form) (emit-tag form)
+ (map? form) (emit-xml-attrs form)
+ true `(xml-text ~form)))
+
+(defmacro xmldecl []
+ '(. *xml-stream-writer* writeStartDocument "UTF-8" "1.0"))
+
+(defmacro xmlns [prefix namespace]
+ `(sync nil
+ (set! *xmlns-prefixes* (assoc *xmlns-prefixes* ~prefix ~namespace))
+ (. *xml-stream-writer* setPrefix ~(to-name prefix) ~namespace)))
+
+(defmacro emit-xml [& forms]
+ `(do ~@(map emit-form forms)))
+
+(defmacro with-xml-output [writer & body]
+ `(binding [*xml-stream-writer*
+ (. *xml-output-factory* createXMLStreamWriter ~writer)
+ *xmlns-prefixes* {}
+ *xml-tag-depth* 0]
+ (try
+ (emit-xml ~@body)
+ (finally
+ (. *xml-stream-writer* writeEndDocument)
+ (. *xml-stream-writer* close)
+ nil))))