diff options
Diffstat (limited to 'src/clj/clojure/xml.clj')
-rw-r--r-- | src/clj/clojure/xml.clj | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/src/clj/clojure/xml.clj b/src/clj/clojure/xml.clj new file mode 100644 index 00000000..117016af --- /dev/null +++ b/src/clj/clojure/xml.clj @@ -0,0 +1,115 @@ +; Copyright (c) Rich Hickey. All rights reserved. +; The use and distribution terms for this software are covered by the +; Common Public License 1.0 (http://opensource.org/licenses/cpl.php) +; which can be found in the file CPL.TXT at the root of this distribution. +; By using this software in any fashion, you are agreeing to be bound by +; the terms of this license. +; You must not remove this notice, or any other, from this software. + +(ns clojure.xml + (:import (org.xml.sax ContentHandler Attributes SAXException) + (javax.xml.parsers SAXParser SAXParserFactory))) + +(def *stack*) +(def *current*) +(def *state*) ; :element :chars :between +(def *sb*) + +(defstruct element :tag :attrs :content) + +(def tag (accessor element :tag)) +(def attrs (accessor element :attrs)) +(def content (accessor element :content)) + +(def content-handler + (let [push-content (fn [e c] + (assoc e :content (conj (or (:content e) []) c))) + push-chars (fn [] + (when (and (= *state* :chars) + (some (complement #(. Character (isWhitespace %))) (str *sb*))) + (set! *current* (push-content *current* (str *sb*)))))] + (new clojure.lang.XMLHandler + (proxy [ContentHandler] [] + (startElement [uri local-name q-name #^Attributes atts] + (let [attrs (fn [ret i] + (if (neg? i) + ret + (recur (assoc ret + (. clojure.lang.Keyword (intern (symbol (. atts (getQName i))))) + (. atts (getValue i))) + (dec i)))) + e (struct element + (. clojure.lang.Keyword (intern (symbol q-name))) + (when (pos? (. atts (getLength))) + (attrs {} (dec (. atts (getLength))))))] + (push-chars) + (set! *stack* (conj *stack* *current*)) + (set! *current* e) + (set! *state* :element)) + nil) + (endElement [uri local-name q-name] + (push-chars) + (set! *current* (push-content (peek *stack*) *current*)) + (set! *stack* (pop *stack*)) + (set! *state* :between) + nil) + (characters [ch start length] + (when-not (= *state* :chars) + (set! *sb* (new StringBuilder))) + (let [#^StringBuilder sb *sb*] + (. sb (append ch start length)) + (set! *state* :chars)) + nil) + (setDocumentLocator [locator]) + (startDocument []) + (endDocument []) + (startPrefixMapping [prefix uri]) + (endPrefixMapping [prefix]) + (ignorableWhitespace [ch start length]) + (processingInstruction [target data]) + (skippedEntity [name]) + )))) + +(defn startparse-sax [s ch] + (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch))) + +(defn parse + "Parses and loads the source s, which can be a File, InputStream or + String naming a URI. Returns a tree of the xml/element struct-map, + which has the keys :tag, :attrs, and :content. and accessor fns tag, + attrs, and content. Other parsers can be supplied by passing + startparse, a fn taking a source and a ContentHandler and returning + a parser" + ([s] (parse s startparse-sax)) + ([s startparse] + (binding [*stack* nil + *current* (struct element) + *state* :between + *sb* nil] + (startparse s content-handler) + ((:content *current*) 0)))) + +(defn emit-element [e] + (if (instance? String e) + (println e) + (do + (print (str "<" (name (:tag e)))) + (when (:attrs e) + (doseq [attr (:attrs e)] + (print (str " " (name (key attr)) "='" (val attr)"'")))) + (if (:content e) + (do + (println ">") + (doseq [c (:content e)] + (emit-element c)) + (println (str "</" (name (:tag e)) ">"))) + (println "/>"))))) + +(defn emit [x] + (println "<?xml version='1.0' encoding='UTF-8'?>") + (emit-element x)) + +;(export '(tag attrs content parse element emit emit-element)) + +;(load-file "/Users/rich/dev/clojure/src/xml.clj") +;(def x (xml/parse "http://arstechnica.com/journals.rssx")) |