aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChouser <chouser@n01se.net>2008-04-02 14:34:01 +0000
committerChouser <chouser@n01se.net>2008-04-02 14:34:01 +0000
commit093d86d3ddc956a0077396040cccd1981b6991a2 (patch)
tree30a41825395f1726710f0858795a4c3b5ddfc301
Clean up namespaces and function names.
-rw-r--r--zip-filter.clj212
1 files changed, 212 insertions, 0 deletions
diff --git a/zip-filter.clj b/zip-filter.clj
new file mode 100644
index 00000000..361aa474
--- /dev/null
+++ b/zip-filter.clj
@@ -0,0 +1,212 @@
+; Copyright (c) Chris Houser, April 2008. All rights reserved.
+; The use and distribution terms for this software are covered by the
+; Common Public License 1.0 (http://opensource.org/licenses/cpl.php)
+; which can be found in the file CPL.TXT at the root of this distribution.
+; By using this software in any fashion, you are agreeing to be bound by
+; the terms of this license.
+; You must not remove this notice, or any other, from this software.
+
+; System for filtering trees and nodes generated by zip.clj in
+; general, and xml trees in particular.
+
+(in-ns 'zip-filter)
+(clojure/refer 'clojure)
+
+(defn coll?
+ "Returns true if x implements IPersistentCollection."
+ [x] (instance? clojure.lang.IPersistentCollection x))
+
+(defn right-locs
+ "Returns a lazy sequence of locations to the right of loc."
+ [loc] (when loc (lazy-cons loc (right-locs (zip/right loc)))))
+
+(defn leftmost?
+ "Returns true if there are no more nodes to the left of location loc."
+ [loc] (nil? (zip/left loc)))
+
+(defn flatten
+ "Returns a lazy sequence of all descencents of location loc, in
+ depth-first order, left-to-right."
+ [loc]
+ (if (zip/branch? loc)
+ (lazy-cons loc (mapcat flatten (right-locs (zip/down loc))))
+ (list loc)))
+
+(defn fixup-apply
+ "Calls (func loc), and then converts the result to the 'appropriate'
+ sequence."
+ #^{:private true}
+ [func loc]
+ (try
+ ;(prn :PRE (zip/node loc))
+ (let [rtn (func loc)]
+ (cond (= rtn true) (list loc)
+ (contains? (meta rtn) :zip-filter/is-node?) (list rtn)
+ (= rtn false) nil
+ (= rtn nil) nil
+ (coll? rtn) rtn
+ :else (list rtn)))
+ (catch java.lang.NullPointerException e (prn :CAUGHT e))))
+
+(defn seq-filter-expr
+ #^{:private true}
+ [func s] (mapcat (fn [loc] (fixup-apply func loc)) s))
+
+(defn mapcat-chain
+ "Used in building query macros. See xml->"
+ #^{:private true}
+ [loc exprs func]
+ (let [prevseq (gensym 'prevseq_)]
+ `(let [initloc# ~loc
+ ~prevseq
+ (list (with-meta initloc#
+ (assoc ^initloc# :zip-filter/is-node? true)))
+ ~@(mapcat #(list prevseq
+ (list 'zip-filter/seq-filter-expr
+ (let [usercode (func %)]
+ (cond usercode usercode
+ :else %))
+ prevseq))
+ exprs)]
+ ~prevseq)))
+
+
+; === xml-zipper query specialization ===
+
+(in-ns 'zip-filter-xml)
+(clojure/refer 'clojure)
+(refer 'zip-filter)
+
+(defn attr
+ "Returns the xml attribute named attrname, of the xml node at location loc."
+ [loc attrname]
+ (let [n (zip/node loc) a (n :attrs)]
+ (and a (a attrname))))
+
+(defn attr=
+ "Returns a query predicate that matches a node when it has an
+ attribute named attrname whose value is attrval."
+ [attrname attrval]
+ (fn [loc] (= (attr loc attrname) attrval)))
+
+(defn tag=
+ "Returns a query predicate that matches a node when its is a tag
+ named tagname."
+ [tagname]
+ (fn [loc]
+ (if (zip/branch? loc)
+ (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag)))
+ (right-locs (zip/down loc)))
+ nil)))
+
+(defn content
+ "Returns the contents of the xml node at location loc."
+ [loc] ((zip/node loc) :content))
+
+(defn content=
+ "Returns a query predicate that matches a node when its xml content equals s."
+ [s] (fn [loc] (= ((zip/node loc) :content) [s])))
+
+(defmacro seq-test
+ "Returns a query predicate that matches a node when its xml content
+ matches the query expresions given."
+ #^{:private true}
+ [& preds] `(fn [loc#] (and (xml-> loc# ~@preds) (list loc#))))
+
+(defmacro xml->
+ "The loc is passed to the first predicate. If the predicate returns
+ a collection, each value of the collection is passed to the next
+ predicate. If it returns a location, the location is passed to the
+ next predicate. If it returns true, the input location is passed to
+ the next predicate. If it returns false or nil, the next predicate
+ is not called.
+
+ This process is repeated, passing the processed results of each
+ predicate to the next predicate. xml-> returns the final sequence.
+ The entire chain is evaluated lazily.
+
+ There are also special predicates: keywords are converted to
+ xml-tag=, strings to xml-content=, and vectors to sub-queries that
+ return true if they match.
+
+ See the footer of zip-query.clj for examples."
+ [loc & preds]
+ (mapcat-chain loc preds
+ #(cond (keyword? %) (list 'tag= %)
+ (string? %) (list 'content= %)
+ (vector? %) (list* 'seq-test %))))
+
+(defmacro xml1->
+ "Returns the first item from loc based on the query predicates
+ given. See xml->"
+ [loc & preds] `(first (xml-> ~loc ~@preds)))
+
+(defn clean-str
+ "Returns the textual contents of the given sequence of xml
+ locations, similar to xpaths's value-of"
+ [locseq]
+ (. #^String (apply str (mapcat #(xml-> % flatten zip/node string?) locseq))
+ (replaceAll (str "[\\s" (char 160) "]+") " ")))
+
+
+; === examples ===
+
+(comment
+
+(defn parse-str [s]
+ (zip/xml-zip (xml/parse (new org.xml.sax.InputSource
+ (new java.io.StringReader s)))))
+
+(def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?>
+<feed xmlns='http://www.w3.org/2005/Atom'>
+ <id>tag:blogger.com,1999:blog-28403206</id>
+ <updated>2008-02-14T08:00:58.567-08:00</updated>
+ <title type='text'>n01senet</title>
+ <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/>
+ <entry>
+ <id>1</id>
+ <published>2008-02-13</published>
+ <title type='text'>clojure is the best lisp yet</title>
+ <author><name>Chouser</name></author>
+ </entry>
+ <entry>
+ <id>2</id>
+ <published>2008-02-07</published>
+ <title type='text'>experimenting with vnc</title>
+ <author><name>agriffis</name></author>
+ </entry>
+</feed>
+"))
+
+; simple single-function filter
+(assert (= (xml-> atom1 #((zip/node %) :tag))
+ '(:feed)))
+
+; two-stage filter using helpful query prediates
+(assert (= (xml-> atom1 (tag= :title) content)
+ '("n01senet")))
+
+; same filter as above, this time using keyword shortcut
+(assert (= (xml-> atom1 :title content)
+ '("n01senet")))
+
+; multi-stage filter
+(assert (= (xml-> atom1 :entry :author :name content)
+ '("Chouser" "agriffis")))
+
+; multi-stage filter with subquery specified using a vector
+(assert (= (xml-> atom1 :entry [:author :name (content= "agriffis")]
+ :id content)
+ '("2")))
+
+; same filter as above, this time using a string shortcut
+(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id content)
+ '("2")))
+
+; attribute access
+(assert (= (xml-> atom1 :title #(attr % :type))
+ '("text")))
+
+; attribute filtering
+(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] #(attr % :type))
+ '("text/html")))