summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/xml.clj74
1 files changed, 74 insertions, 0 deletions
diff --git a/src/xml.clj b/src/xml.clj
new file mode 100644
index 00000000..a0dfce02
--- /dev/null
+++ b/src/xml.clj
@@ -0,0 +1,74 @@
+; Copyright (c) Rich Hickey. All rights reserved.
+; The use and distribution terms for this software are covered by the
+; Common Public License 1.0 (http://opensource.org/licenses/cpl.php)
+; which can be found in the file CPL.TXT at the root of this distribution.
+; By using this software in any fashion, you are agreeing to be bound by
+; the terms of this license.
+; You must not remove this notice, or any other, from this software.
+
+(in-namespace 'xml)
+
+(import '(org.xml.sax ContentHandler Attributes SAXException)
+ '(javax.xml.parsers SAXParser SAXParserFactory))
+
+(def *stack*)
+(def *current*)
+(def *state*) ; :element :chars :between
+(def *sb*)
+
+(defstruct element :tag :attrs :content)
+
+(def tag (accessor element :tag))
+(def attrs (accessor element :attrs))
+(def content (accessor element :content))
+
+(def content-handler
+ (new clojure.lang.XMLHandler
+ (implement [ContentHandler]
+ (startElement [uri local-name q-name #^Attributes atts]
+ (let [attrs (fn [ret i]
+ (if (neg? i)
+ ret
+ (recur (assoc ret
+ (. clojure.lang.Keyword (intern (sym (. atts (getQName i)))))
+ (. atts (getValue i)))
+ (dec i))))
+ e (struct element
+ :tag (. clojure.lang.Keyword (intern (sym q-name)))
+ :attrs (when (pos? (. atts (getLength)))
+ (attrs {} (dec (. atts (getLength))))))]
+ (set! *stack* (conj *stack* *current*))
+ (set! *current* e)
+ (set! *state* :element))
+ nil)
+ (endElement [uri local-name q-name]
+ (let [push-content (fn [e c]
+ (assoc e :content (conj (or (:content e) []) c)))]
+ (when (eql? *state* :chars)
+ (set! *current* (push-content *current* (str *sb*))))
+ (set! *current* (push-content (peek *stack*) *current*))
+ (set! *stack* (pop *stack*))
+ (set! *state* :between))
+ nil)
+ (characters [ch start length]
+ (when-not (eql? *state* :between)
+ (when (eql? *state* :element)
+ (set! *sb* (new StringBuilder)))
+ (let [#^StringBuilder sb *sb*]
+ (. sb (append ch start length))
+ (set! *state* :chars)))
+ nil))))
+
+(defn parse [s]
+ (let [p (.. SAXParserFactory (newInstance) (newSAXParser))]
+ (binding [*stack* nil
+ *current* (struct element)
+ *state* :between
+ *sb* nil]
+ (. p (parse s content-handler))
+ ((:content *current*) 0))))
+
+(def *export* '(xml tag attrs content parse))
+
+;(load-file "/Users/rich/dev/clojure/src/xml.clj")
+;(def x (xml/parse "http://arstechnica.com/journals.rssx"))