aboutsummaryrefslogtreecommitdiff
path: root/zip_filter/xml/xml.clj
diff options
context:
space:
mode:
Diffstat (limited to 'zip_filter/xml/xml.clj')
-rw-r--r--zip_filter/xml/xml.clj177
1 files changed, 177 insertions, 0 deletions
diff --git a/zip_filter/xml/xml.clj b/zip_filter/xml/xml.clj
new file mode 100644
index 00000000..16f67e8e
--- /dev/null
+++ b/zip_filter/xml/xml.clj
@@ -0,0 +1,177 @@
+; Copyright (c) Chris Houser, April 2008. All rights reserved.
+; The use and distribution terms for this software are covered by the
+; Common Public License 1.0 (http://opensource.org/licenses/cpl.php)
+; which can be found in the file CPL.TXT at the root of this distribution.
+; By using this software in any fashion, you are agreeing to be bound by
+; the terms of this license.
+; You must not remove this notice, or any other, from this software.
+
+; Specialization of zip-filter for xml trees.
+
+(clojure/in-ns 'clojure.contrib.zip-filter.xml)
+(clojure/refer 'clojure)
+
+; load zip-filter if it isn't already -- when will lib.clj be guaranteed?
+(when-not (find-ns 'clojure.contrib.zip-filter)
+ (clojure.lang.RT/loadResourceScript
+ "clojure/contrib/zip_filter/zip_filter.clj"))
+
+(alias 'zf 'clojure.contrib.zip-filter)
+(alias 'zip 'clojure.zip)
+(alias 'xml 'clojure.xml)
+
+(def xml->)
+
+(defn attr
+ "Returns the xml attribute named attrname, of the xml node at location loc."
+ ([attrname] (fn [loc] (attr loc attrname)))
+ ([loc attrname] (when (zip/branch? loc) (-> loc zip/node :attrs attrname))))
+
+(defn attr=
+ "Returns a query predicate that matches a node when it has an
+ attribute named attrname whose value is attrval."
+ [attrname attrval] (fn [loc] (= attrval (attr loc attrname))))
+
+(defn tag=
+ "Returns a query predicate that matches a node when its is a tag
+ named tagname."
+ [tagname]
+ (fn [loc]
+ (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag)))
+ (if (zf/auto? loc)
+ (zf/children-auto loc)
+ (list (zf/auto true loc))))))
+
+(defn text
+ "Returns the textual contents of the given location, similar to
+ xpaths's value-of"
+ [loc]
+ (.replaceAll
+ #^String (apply str (xml-> loc zf/descendants zip/node string?))
+ (str "[\\s" (char 160) "]+") " "))
+
+(defn text=
+ "Returns a query predicate that matches a node when its textual
+ content equals s."
+ [s] (fn [loc] (= (text loc) s)))
+
+(defn seq-test
+ "Returns a query predicate that matches a node when its xml content
+ matches the query expresions given."
+ #^{:private true}
+ [preds] (fn [loc] (and (apply xml-> loc preds) (list loc))))
+
+(defn xml->
+ "The loc is passed to the first predicate. If the predicate returns
+ a collection, each value of the collection is passed to the next
+ predicate. If it returns a location, the location is passed to the
+ next predicate. If it returns true, the input location is passed to
+ the next predicate. If it returns false or nil, the next predicate
+ is not called.
+
+ This process is repeated, passing the processed results of each
+ predicate to the next predicate. xml-> returns the final sequence.
+ The entire chain is evaluated lazily.
+
+ There are also special predicates: keywords are converted to tag=,
+ strings to text=, and vectors to sub-queries that return true if
+ they match.
+
+ See the footer of zip-query.clj for examples."
+ [loc & preds]
+ (zf/mapcat-chain loc preds
+ #(cond (keyword? %) (tag= %)
+ (string? %) (text= %)
+ (vector? %) (seq-test %))))
+
+(defn xml1->
+ "Returns the first item from loc based on the query predicates
+ given. See xml->"
+ [loc & preds] (first (apply xml-> loc preds)))
+
+
+; === examples ===
+
+(comment
+
+(defn parse-str [s]
+ (zip/xml-zip (xml/parse (new org.xml.sax.InputSource
+ (new java.io.StringReader s)))))
+
+(def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?>
+<feed xmlns='http://www.w3.org/2005/Atom'>
+ <id>tag:blogger.com,1999:blog-28403206</id>
+ <updated>2008-02-14T08:00:58.567-08:00</updated>
+ <title type='text'>n01senet</title>
+ <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/>
+ <entry>
+ <id>1</id>
+ <published>2008-02-13</published>
+ <title type='text'>clojure is the best lisp yet</title>
+ <author><name>Chouser</name></author>
+ </entry>
+ <entry>
+ <id>2</id>
+ <published>2008-02-07</published>
+ <title type='text'>experimenting with vnc</title>
+ <author><name>agriffis</name></author>
+ </entry>
+</feed>
+"))
+
+; simple single-function filter
+(assert (= (xml-> atom1 #((zip/node %) :tag))
+ '(:feed)))
+
+; two-stage filter using helpful query prediates
+(assert (= (xml-> atom1 (tag= :title) text)
+ '("n01senet")))
+
+; same filter as above, this time using keyword shortcut
+(assert (= (xml-> atom1 :title text)
+ '("n01senet")))
+
+; multi-stage filter
+(assert (= (xml-> atom1 :entry :author :name text)
+ '("Chouser" "agriffis")))
+
+; test xml1->
+(assert (= (xml1-> atom1 :entry :author :name text)
+ "Chouser"))
+
+; multi-stage filter with subquery specified using a vector
+(assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")]
+ :id text)
+ '("2")))
+
+; same filter as above, this time using a string shortcut
+(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text)
+ '("2")))
+
+; attribute access
+(assert (= (xml-> atom1 :title (attr :type))
+ '("text")))
+
+; attribute filtering
+(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type))
+ '("text/html")))
+
+; ancestors
+(assert (= (xml-> atom1 zf/descendants :id "2" zf/ancestors zip/node #(:tag %))
+ '(:id :entry :feed)))
+
+; ancestors with non-auto tag= (:entry), followed by auto tag= (:id)
+(assert (= (xml-> atom1 zf/descendants :name "Chouser" zf/ancestors
+ :entry :id text)
+ '("1")))
+
+; left-locs and detection of returning a single loc (zip/up)
+(assert (= (xml-> atom1 zf/descendants :name "Chouser" zip/up
+ zf/left-locs :id text)
+ '("1")))
+
+; right-locs
+(assert (= (xml-> atom1 zf/descendants :id zf/right-locs :author text)
+ '("Chouser" "agriffis")))
+
+)