summaryrefslogtreecommitdiff
path: root/src/xml.clj
blob: abc3844c6320ea35dfff5e2e9f5d108e5a71bf0f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
;   Copyright (c) Rich Hickey. All rights reserved.
;   The use and distribution terms for this software are covered by the
;   Common Public License 1.0 (http://opensource.org/licenses/cpl.php)
;   which can be found in the file CPL.TXT at the root of this distribution.
;   By using this software in any fashion, you are agreeing to be bound by
;   the terms of this license.
;   You must not remove this notice, or any other, from this software.

(in-ns 'xml)
(clojure/refer 'clojure)

(import '(org.xml.sax ContentHandler Attributes SAXException)
	'(javax.xml.parsers SAXParser SAXParserFactory))

(def *stack*)
(def *current*)
(def *state*) ; :element :chars :between
(def *sb*)

(defstruct element :tag :attrs :content)

(def tag (accessor element :tag))
(def attrs (accessor element :attrs))
(def content (accessor element :content))

(def content-handler
  (let [push-content (fn [e c]
                       (assoc e :content (conj (or (:content e) []) c)))
        push-chars (fn []
                     (when (and (= *state* :chars)
                                (some (complement #(. Character (isWhitespace %))) (str *sb*)))
                       (set! *current* (push-content *current* (str *sb*)))))]
    (new clojure.lang.XMLHandler
         (implement [ContentHandler]
           (startElement [uri local-name q-name #^Attributes atts]
             (let [attrs (fn [ret i]
                           (if (neg? i)
                             ret
                             (recur (assoc ret 
                                           (. clojure.lang.Keyword (intern (symbol (. atts (getQName i)))))
                                           (. atts (getValue i)))
                                    (dec i))))
                   e (struct element 
                             (. clojure.lang.Keyword (intern (symbol q-name)))
                             (when (pos? (. atts (getLength)))
                               (attrs {} (dec (. atts (getLength))))))]
               (push-chars)
               (set! *stack* (conj *stack* *current*))
               (set! *current* e)
               (set! *state* :element))
             nil)
           (endElement [uri local-name q-name]
             (push-chars)
             (set! *current* (push-content (peek *stack*) *current*))
             (set! *stack* (pop *stack*))
             (set! *state* :between)
             nil)
           (characters [ch start length]
             (when-not (= *state* :chars)
               (set! *sb* (new StringBuilder)))
             (let [#^StringBuilder sb *sb*]
               (. sb (append ch start length))
               (set! *state* :chars))
             nil)))))

(defn startparse-sax [s ch]
  (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch)))

(defn parse
  ([s] (parse s startparse-sax))
  ([s startparse]
    (binding [*stack* nil
              *current* (struct element)
              *state* :between
              *sb* nil]
      (startparse s content-handler)
      ((:content *current*) 0)))) 

(defn emit-element [e]
  (if (instance? String e)
    (println e)
    (do
      (print (str "<" (name (:tag e))))
      (when (:attrs e)
	(doseq attr (:attrs e)
	  (print (str " " (name (key attr)) "='" (val attr)"'"))))
      (if (:content e)
	(do
	  (println ">")
	  (doseq c (:content e)
	    (emit-element c))
	  (println (str "</" (name (:tag e)) ">")))
	(println "/>")))))

(defn emit [x]
  (println "<?xml version='1.0' encoding='UTF-8'?>")
  (emit-element x))

;(export '(tag attrs content parse element emit emit-element))

;(load-file "/Users/rich/dev/clojure/src/xml.clj")
;(def x (xml/parse "http://arstechnica.com/journals.rssx"))