; Copyright (c) Rich Hickey. All rights reserved. ; The use and distribution terms for this software are covered by the ; Common Public License 1.0 (http://opensource.org/licenses/cpl.php) ; which can be found in the file CPL.TXT at the root of this distribution. ; By using this software in any fashion, you are agreeing to be bound by ; the terms of this license. ; You must not remove this notice, or any other, from this software. (in-ns 'xml) (clojure/refer 'clojure) (import '(org.xml.sax ContentHandler Attributes SAXException) '(javax.xml.parsers SAXParser SAXParserFactory)) (def *stack*) (def *current*) (def *state*) ; :element :chars :between (def *sb*) (defstruct element :tag :attrs :content) (def tag (accessor element :tag)) (def attrs (accessor element :attrs)) (def content (accessor element :content)) (def content-handler (let [push-content (fn [e c] (assoc e :content (conj (or (:content e) []) c))) push-chars (fn [] (when (and (= *state* :chars) (some (complement #(. Character (isWhitespace %))) (str *sb*))) (set! *current* (push-content *current* (str *sb*)))))] (new clojure.lang.XMLHandler (proxy [ContentHandler] [] (startElement [uri local-name q-name #^Attributes atts] (let [attrs (fn [ret i] (if (neg? i) ret (recur (assoc ret (. clojure.lang.Keyword (intern (symbol (. atts (getQName i))))) (. atts (getValue i))) (dec i)))) e (struct element (. clojure.lang.Keyword (intern (symbol q-name))) (when (pos? (. atts (getLength))) (attrs {} (dec (. atts (getLength))))))] (push-chars) (set! *stack* (conj *stack* *current*)) (set! *current* e) (set! *state* :element)) nil) (endElement [uri local-name q-name] (push-chars) (set! *current* (push-content (peek *stack*) *current*)) (set! *stack* (pop *stack*)) (set! *state* :between) nil) (characters [ch start length] (when-not (= *state* :chars) (set! *sb* (new StringBuilder))) (let [#^StringBuilder sb *sb*] (. sb (append ch start length)) (set! *state* :chars)) nil) (setDocumentLocator [locator]) (startDocument []) (endDocument []) (startPrefixMapping [prefix uri]) (endPrefixMapping [prefix]) (ignorableWhitespace [ch start length]) (processingInstruction [target data]) (skippedEntity [name]) )))) (defn startparse-sax [s ch] (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch))) (defn parse "Parses and loads the source s, which can be a File, InputStream or String naming a URI. Returns a tree of the xml/element struct-map, which has the keys :tag, :attrs, and :content. and accessor fns tag, attrs, and content. Other parsers can be supplied by passing startparse, a fn taking a source and a ContentHandler and returning a parser" ([s] (parse s startparse-sax)) ([s startparse] (binding [*stack* nil *current* (struct element) *state* :between *sb* nil] (startparse s content-handler) ((:content *current*) 0)))) (defn emit-element [e] (if (instance? String e) (println e) (do (print (str "<" (name (:tag e)))) (when (:attrs e) (doseq attr (:attrs e) (print (str " " (name (key attr)) "='" (val attr)"'")))) (if (:content e) (do (println ">") (doseq c (:content e) (emit-element c)) (println (str ""))) (println "/>"))))) (defn emit [x] (println "") (emit-element x)) ;(export '(tag attrs content parse element emit emit-element)) ;(load-file "/Users/rich/dev/clojure/src/xml.clj") ;(def x (xml/parse "http://arstechnica.com/journals.rssx"))