summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStuart Halloway <stu@thinkrelevance.com>2010-10-12 13:12:40 -0400
committerStuart Halloway <stu@thinkrelevance.com>2010-10-12 19:49:42 -0400
commit3c148e2ee7f244fccd246365010055dcc946abe0 (patch)
tree0b24718b4fc6c1ff83b2a9ca991c5b588c72b756
parent5ca0c1feb7f7260aad257e52f2ddb0d426e2db77 (diff)
#448 structural diff
Signed-off-by: Stuart Halloway <stu@thinkrelevance.com>
-rw-r--r--build.xml1
-rw-r--r--src/clj/clojure/data.clj108
-rw-r--r--test/clojure/test_clojure.clj1
-rw-r--r--test/clojure/test_clojure/data.clj27
4 files changed, 137 insertions, 0 deletions
diff --git a/build.xml b/build.xml
index 1fc9cc94..2cb38650 100644
--- a/build.xml
+++ b/build.xml
@@ -119,6 +119,7 @@
<arg value="clojure.java.shell"/>
<arg value="clojure.java.browse-ui"/>
<arg value="clojure.string"/>
+ <arg value="clojure.data"/>
</java>
</target>
diff --git a/src/clj/clojure/data.clj b/src/clj/clojure/data.clj
new file mode 100644
index 00000000..fc962049
--- /dev/null
+++ b/src/clj/clojure/data.clj
@@ -0,0 +1,108 @@
+; Copyright (c) Rich Hickey. All rights reserved.
+; The use and distribution terms for this software are covered by the
+; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+; which can be found in the file epl-v10.html at the root of this distribution.
+; By using this software in any fashion, you are agreeing to be bound by
+; the terms of this license.
+; You must not remove this notice, or any other, from this software.
+
+(ns
+ ^{:author "Stuart Halloway",
+ :doc "Non-core data functions."}
+ clojure.data
+ (:require [clojure.set :as set]))
+
+(defn- atom-diff
+ "Internal helper for diff."
+ [a b]
+ (if (= a b) [nil nil a] [a b nil]))
+
+;; for big things a sparse vector class would be better
+(defn- vectorize
+ "Convert an associative-by-numeric-index collection into
+ an equivalent vector, with nil for any missing keys"
+ [m]
+ (when (seq m)
+ (reduce
+ (fn [result [k v]] (assoc result k v))
+ (vec (repeat (apply max (keys m)) nil))
+ m)))
+
+(declare diff)
+
+(defprotocol EqualityPartition
+ "Implementation detail. Subject to change."
+ (equality-partition [x] "Implementation detail. Subject to change."))
+
+(defprotocol Diff
+ "Implementation detail. Subject to change."
+ (diff-similar [a b] "Implementation detail. Subject to change."))
+
+(extend nil
+ Diff
+ {:diff-similar atom-diff})
+
+(extend Object
+ Diff
+ {:diff-similar atom-diff}
+ EqualityPartition
+ {:equality-partition (fn [x] (if (.. x getClass isArray) :sequential :atom))})
+
+(defn- diff-associative
+ "Diff associative things a and b, comparing only keys in ks."
+ [a b ks]
+ (reduce
+ (fn [diff1 diff2]
+ (map merge diff1 diff2))
+ [nil nil nil]
+ (map
+ (fn [k] (map #(when % {k %}) (diff (get a k) (get b k))))
+ ks)))
+
+(extend-protocol EqualityPartition
+ nil
+ (equality-partition [x] :atom)
+
+ java.util.Set
+ (equality-partition [x] :set)
+
+ java.util.List
+ (equality-partition [x] :sequential)
+
+ java.util.Map
+ (equality-partition [x] :map))
+
+(extend-protocol Diff
+ java.util.Set
+ (diff-similar [a b]
+ [(not-empty (set/difference a b))
+ (not-empty (set/difference b a))
+ (not-empty (set/intersection a b))])
+
+ java.util.List
+ (diff-similar [a b]
+ (vec (map vectorize (diff-associative
+ (if (vector? a) a (vec a))
+ (if (vector? b) b (vec b))
+ (range (max (count a) (count b)))))))
+
+ java.util.Map
+ (diff-similar [a b]
+ (diff-associative a b (set/union (keys a) (keys b)))))
+
+(defn diff
+ "Recursively compares a and b, returning a tuple of
+ [things-only-in-a things-only-in-b things-in-both].
+ Comparison rules:
+
+ * Maps are subdiffed where keys match and values differ.
+ * Sets are never subdiffed.
+ * All sequential things are treated as associative collections
+ by their indexes, with results returned as vectors.
+ * Everything else (including strings!) is treated as
+ an atom and compared for equality."
+ [a b]
+ (if (= (equality-partition a) (equality-partition b))
+ (diff-similar a b)
+ (atom-diff a b)))
+
diff --git a/test/clojure/test_clojure.clj b/test/clojure/test_clojure.clj
index fa482d88..a82ed0ae 100644
--- a/test/clojure/test_clojure.clj
+++ b/test/clojure/test_clojure.clj
@@ -64,6 +64,7 @@
:transients
:def
:keywords
+ :data
])
(def test-namespaces
diff --git a/test/clojure/test_clojure/data.clj b/test/clojure/test_clojure/data.clj
new file mode 100644
index 00000000..5c96c86d
--- /dev/null
+++ b/test/clojure/test_clojure/data.clj
@@ -0,0 +1,27 @@
+; Copyright (c) Rich Hickey. All rights reserved.
+; The use and distribution terms for this software are covered by the
+; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+; which can be found in the file epl-v10.html at the root of this distribution.
+; By using this software in any fashion, you are agreeing to be bound by
+; the terms of this license.
+; You must not remove this notice, or any other, from this software.
+
+(ns clojure.test-clojure.data
+ (:use clojure.data clojure.test))
+
+(deftest diff-test
+ (are [d x y] (= d (diff x y))
+ [nil nil nil] nil nil
+ [1 2 nil] 1 2
+ [nil nil [1 2 3]] [1 2 3] '(1 2 3)
+ [1 [:a :b] nil] 1 [:a :b]
+ [{:a 1} :b nil] {:a 1} :b
+ [:team #{:p1 :p2} nil] :team #{:p1 :p2}
+ [{0 :a} [:a] nil] {0 :a} [:a]
+ [nil [nil 2] [1]] [1] [1 2]
+ [nil nil [1 2]] [1 2] (into-array [1 2])
+ [#{:a} #{:b} #{:c :d}] #{:a :c :d} #{:b :c :d}
+ [nil nil {:a 1}] {:a 1} {:a 1}
+ [{:a #{2}} {:a #{4}} {:a #{3}}] {:a #{2 3}} {:a #{3 4}}
+ [{:a {:c [1]}} {:a {:c [0]}} {:a {:c [nil 2] :b 1}}] {:a {:b 1 :c [1 2]}} {:a {:b 1 :c [0 2]}}))
+