diff options
author | Chouser <chouser@n01se.net> | 2008-04-06 01:07:32 +0000 |
---|---|---|
committer | Chouser <chouser@n01se.net> | 2008-04-06 01:07:32 +0000 |
commit | 26995ac361e460144102c83bb438d2d3b1413822 (patch) | |
tree | 3ad204552e6d4ded64638de86ec0f98e464213fa | |
parent | f90980822af417bb17a975a5aa1116d5a2694fb0 (diff) |
Add more axis, remove all macro definitions, and simplify implementation.
-rw-r--r-- | zip-filter.clj | 212 |
1 files changed, 123 insertions, 89 deletions
diff --git a/zip-filter.clj b/zip-filter.clj index 361aa474..976f4d06 100644 --- a/zip-filter.clj +++ b/zip-filter.clj @@ -9,111 +9,130 @@ ; System for filtering trees and nodes generated by zip.clj in ; general, and xml trees in particular. -(in-ns 'zip-filter) +(clojure/in-ns 'zip-filter) (clojure/refer 'clojure) -(defn coll? - "Returns true if x implements IPersistentCollection." - [x] (instance? clojure.lang.IPersistentCollection x)) +(defn sequential? + "Returns true if x implements Sequential." + [x] (instance? clojure.lang.Sequential x)) + +; This uses the negative form (no-auto) so that the result from any +; naive function, including user functions, defaults to "auto". +(defn auto + [v x] (with-meta x ((if v dissoc assoc) ^x :zip-filter/no-auto? true))) + +(defn auto? + [x] (not (:zip-filter/no-auto? ^x))) (defn right-locs - "Returns a lazy sequence of locations to the right of loc." - [loc] (when loc (lazy-cons loc (right-locs (zip/right loc))))) + "Returns a lazy sequence of locations to the right of loc, starting with loc." + [loc] (when loc (lazy-cons (auto false loc) (right-locs (zip/right loc))))) + +(defn left-locs + "Returns a lazy sequence of locations to the left of loc, starting with loc." + [loc] (when loc (lazy-cons (auto false loc) (left-locs (zip/left loc))))) (defn leftmost? "Returns true if there are no more nodes to the left of location loc." [loc] (nil? (zip/left loc))) -(defn flatten - "Returns a lazy sequence of all descencents of location loc, in - depth-first order, left-to-right." +(defn rightmost? + "Returns true if there are no more nodes to the right of location loc." + [loc] (nil? (zip/right loc))) + +(defn children + "Returns a lazy sequence of all immediate children of location loc, + left-to-right." + [loc] + (when (zip/branch? loc) + (map #(auto false %) (right-locs (zip/down loc))))) + +(defn children-auto + "Returns a lazy sequence of all immediate children of location loc, + left-to-right, marked so that a following tag= predicate will auto-descend." + #^{:private true} [loc] - (if (zip/branch? loc) - (lazy-cons loc (mapcat flatten (right-locs (zip/down loc)))) - (list loc))) + (when (zip/branch? loc) + (map #(auto true %) (right-locs (zip/down loc))))) + +(defn descendants + "Returns a lazy sequence of all descendants of location loc, in + depth-first order, left-to-right, starting with loc." + [loc] (lazy-cons (auto false loc) (mapcat descendants (children loc)))) + +(defn ancestors + "Returns a lazy sequence of all ancestors of location loc, starting + with loc and proceeding to loc's parent node and on through to the + root of the tree." + [loc] (when loc (lazy-cons (auto false loc) (ancestors (zip/up loc))))) (defn fixup-apply - "Calls (func loc), and then converts the result to the 'appropriate' + "Calls (pred loc), and then converts the result to the 'appropriate' sequence." #^{:private true} - [func loc] - (try - ;(prn :PRE (zip/node loc)) - (let [rtn (func loc)] - (cond (= rtn true) (list loc) - (contains? (meta rtn) :zip-filter/is-node?) (list rtn) - (= rtn false) nil - (= rtn nil) nil - (coll? rtn) rtn - :else (list rtn))) - (catch java.lang.NullPointerException e (prn :CAUGHT e)))) - -(defn seq-filter-expr - #^{:private true} - [func s] (mapcat (fn [loc] (fixup-apply func loc)) s)) + [pred loc] + (let [rtn (pred loc)] + (cond (:zip-filter/is-node? ^rtn) (list rtn) + (= rtn true) (list loc) + (= rtn false) nil + (nil? rtn) nil + (sequential? rtn) rtn + :else (list rtn)))) (defn mapcat-chain - "Used in building query macros. See xml->" #^{:private true} - [loc exprs func] - (let [prevseq (gensym 'prevseq_)] - `(let [initloc# ~loc - ~prevseq - (list (with-meta initloc# - (assoc ^initloc# :zip-filter/is-node? true))) - ~@(mapcat #(list prevseq - (list 'zip-filter/seq-filter-expr - (let [usercode (func %)] - (cond usercode usercode - :else %)) - prevseq)) - exprs)] - ~prevseq))) - - -; === xml-zipper query specialization === - -(in-ns 'zip-filter-xml) + [loc preds mkpred] + (reduce (fn [prevseq expr] + (mapcat (partial fixup-apply (or (mkpred expr) expr)) prevseq)) + (list (with-meta loc (assoc ^loc :zip-filter/is-node? true))) + preds)) + + +; === specialization for xml filtering === + +(clojure/in-ns 'zip-filter-xml) (clojure/refer 'clojure) (refer 'zip-filter) +(def xml->) + (defn attr "Returns the xml attribute named attrname, of the xml node at location loc." - [loc attrname] - (let [n (zip/node loc) a (n :attrs)] - (and a (a attrname)))) + ([attrname] (fn [loc] (attr loc attrname))) + ([loc attrname] (-> loc zip/node :attrs attrname))) (defn attr= "Returns a query predicate that matches a node when it has an attribute named attrname whose value is attrval." - [attrname attrval] - (fn [loc] (= (attr loc attrname) attrval))) + [attrname attrval] (fn [loc] (= attrval (attr loc attrname)))) (defn tag= "Returns a query predicate that matches a node when its is a tag named tagname." [tagname] (fn [loc] - (if (zip/branch? loc) - (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag))) - (right-locs (zip/down loc))) - nil))) + (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag))) + (if (auto? loc) (children-auto loc) (list (auto true loc)))))) -(defn content - "Returns the contents of the xml node at location loc." - [loc] ((zip/node loc) :content)) +(defn text + "Returns the textual contents of the given location, similar to + xpaths's value-of" + [loc] + (.replaceAll #^String (apply str (xml-> loc descendants zip/node string?)) + (str "[\\s" (char 160) "]+") " ")) -(defn content= - "Returns a query predicate that matches a node when its xml content equals s." - [s] (fn [loc] (= ((zip/node loc) :content) [s]))) +(defn text= + "Returns a query predicate that matches a node when its textual + content equals s." + [s] (fn [loc] (= (text loc) s))) -(defmacro seq-test +(defn seq-test "Returns a query predicate that matches a node when its xml content matches the query expresions given." #^{:private true} - [& preds] `(fn [loc#] (and (xml-> loc# ~@preds) (list loc#)))) + [preds] (fn [loc] (and (apply xml-> loc preds) (list loc)))) -(defmacro xml-> +(defn xml-> "The loc is passed to the first predicate. If the predicate returns a collection, each value of the collection is passed to the next predicate. If it returns a location, the location is passed to the @@ -125,28 +144,21 @@ predicate to the next predicate. xml-> returns the final sequence. The entire chain is evaluated lazily. - There are also special predicates: keywords are converted to - xml-tag=, strings to xml-content=, and vectors to sub-queries that - return true if they match. + There are also special predicates: keywords are converted to tag=, + strings to text=, and vectors to sub-queries that return true if + they match. See the footer of zip-query.clj for examples." [loc & preds] (mapcat-chain loc preds - #(cond (keyword? %) (list 'tag= %) - (string? %) (list 'content= %) - (vector? %) (list* 'seq-test %)))) + #(cond (keyword? %) (tag= %) + (string? %) (text= %) + (vector? %) (seq-test %)))) -(defmacro xml1-> +(defn xml1-> "Returns the first item from loc based on the query predicates given. See xml->" - [loc & preds] `(first (xml-> ~loc ~@preds))) - -(defn clean-str - "Returns the textual contents of the given sequence of xml - locations, similar to xpaths's value-of" - [locseq] - (. #^String (apply str (mapcat #(xml-> % flatten zip/node string?) locseq)) - (replaceAll (str "[\\s" (char 160) "]+") " "))) + [loc & preds] (first (apply xml-> loc preds))) ; === examples === @@ -183,30 +195,52 @@ '(:feed))) ; two-stage filter using helpful query prediates -(assert (= (xml-> atom1 (tag= :title) content) +(assert (= (xml-> atom1 (tag= :title) text) '("n01senet"))) ; same filter as above, this time using keyword shortcut -(assert (= (xml-> atom1 :title content) +(assert (= (xml-> atom1 :title text) '("n01senet"))) ; multi-stage filter -(assert (= (xml-> atom1 :entry :author :name content) +(assert (= (xml-> atom1 :entry :author :name text) '("Chouser" "agriffis"))) +; test xml1-> +(assert (= (xml1-> atom1 :entry :author :name text) + "Chouser")) + ; multi-stage filter with subquery specified using a vector -(assert (= (xml-> atom1 :entry [:author :name (content= "agriffis")] - :id content) +(assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")] + :id text) '("2"))) ; same filter as above, this time using a string shortcut -(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id content) +(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text) '("2"))) ; attribute access -(assert (= (xml-> atom1 :title #(attr % :type)) +(assert (= (xml-> atom1 :title (attr :type)) '("text"))) ; attribute filtering -(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] #(attr % :type)) +(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type)) '("text/html"))) + +; ancestors +(assert (= (xml-> atom1 descendants :id "2" ancestors zip/node #(:tag %)) + '(:id :entry :feed))) + +; ancestors with non-auto tag= (:entry), followed by auto tag= (:id) +(assert (= (xml-> atom1 descendants :name "Chouser" ancestors :entry :id text) + '("1"))) + +; left-locs and detection of returning a single loc (zip/up) +(assert (= (xml-> atom1 descendants :name "Chouser" zip/up left-locs :id text) + '("1"))) + +; right-locs +(assert (= (xml-> atom1 descendants :id right-locs :author text) + '("Chouser" "agriffis"))) + +) |