diff options
Diffstat (limited to 'src/main/clojure/clojure/contrib/datalog/database.clj')
-rw-r--r-- | src/main/clojure/clojure/contrib/datalog/database.clj | 287 |
1 files changed, 287 insertions, 0 deletions
diff --git a/src/main/clojure/clojure/contrib/datalog/database.clj b/src/main/clojure/clojure/contrib/datalog/database.clj new file mode 100644 index 00000000..aba41df9 --- /dev/null +++ b/src/main/clojure/clojure/contrib/datalog/database.clj @@ -0,0 +1,287 @@ +;; Copyright (c) Jeffrey Straszheim. All rights reserved. The use and +;; distribution terms for this software are covered by the Eclipse Public +;; License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) which can +;; be found in the file epl-v10.html at the root of this distribution. By +;; using this software in any fashion, you are agreeing to be bound by the +;; terms of this license. You must not remove this notice, or any other, +;; from this software. +;; +;; database.clj +;; +;; A Clojure implementation of Datalog -- Support for in-memory database +;; +;; straszheimjeffrey (gmail) +;; Created 21 Feburary 2009 + + +(ns clojure.contrib.datalog.database + (:use clojure.contrib.datalog.util) + (:use clojure.contrib.def) + (:use [clojure.set :only (union intersection difference)]) + (:use [clojure.contrib.except :only (throwf)])) + + +(defstruct relation + :schema ; A set of key names + :data ; A set of tuples + :indexes) ; A map key names to indexes (in turn a map of value to tuples) + + +;;; DDL + +(defmethod print-method ::datalog-database + [db #^Writer writer] + (binding [*out* writer] + (do + (println "(datalog-database") + (println "{") + (doseq [key (keys db)] + (println) + (println key) + (print-method (db key) writer)) + (println "})")))) + +(defn datalog-database + [rels] + (with-meta rels {:type ::datalog-database})) + +(def empty-database (datalog-database {})) + +(defmethod print-method ::datalog-relation + [rel #^Writer writer] + (binding [*out* writer] + (do + (println "(datalog-relation") + (println " ;; Schema") + (println " " (:schema rel)) + (println) + (println " ;; Data") + (println " #{") + (doseq [tuple (:data rel)] + (println " " tuple)) + (println " }") + (println) + (println " ;; Indexes") + (println " {") + (doseq [key (-> rel :indexes keys)] + (println " " key) + (println " {") + (doseq [val (keys ((:indexes rel) key))] + (println " " val) + (println " " (get-in rel [:indexes key val]))) + (println " }")) + (println " })")))) + +(defn datalog-relation + "Creates a relation" + [schema data indexes] + (with-meta (struct relation schema data indexes) {:type ::datalog-relation})) + +(defn add-relation + "Adds a relation to the database" + [db name keys] + (assoc db name (datalog-relation (set keys) #{} {}))) + +(defn add-index + "Adds an index to an empty relation named name" + [db name key] + (assert (empty? (:data (db name)))) + (let [rel (db name) + inx (assoc (:indexes rel) key {})] + (assoc db name (datalog-relation (:schema rel) + (:data rel) + inx)))) + +(defn ensure-relation + "If the database lacks the named relation, add it" + [db name keys indexes] + (if-let [rel (db name)] + (do + (assert (= (:schema rel) (set keys))) + db) + (let [db1 (add-relation db name keys)] + (reduce (fn [db key] (add-index db name key)) + db1 + indexes)))) + + +(defmacro make-database + "Makes a database, like this + (make-database + (relation :fred [:mary :sue]) + (index :fred :mary) + (relation :sally [:jen :becky]) + (index :sally :jen) + (index :sally :becky))" + [& commands] + (let [wrapper (fn [cur new] + (let [cmd (first new) + body (next new)] + (assert (= 2 (count body))) + (cond + (= cmd 'relation) + `(add-relation ~cur ~(first body) ~(fnext body)) + (= cmd 'index) + `(add-index ~cur ~(first body) ~(fnext body)) + :otherwise (throwf "%s not recognized" new))))] + (reduce wrapper `empty-database commands))) + +(defn get-relation + "Get a relation object by name" + [db rel-name] + (db rel-name)) + +(defn replace-relation + "Add or replace a fully constructed relation object to the database." + [db rel-name rel] + (assoc db rel-name rel)) + + +;;; DML + + +(defn database-counts + "Returns a map with the count of elements in each relation." + [db] + (map-values #(-> % :data count) db)) + +(defn- modify-indexes + "Perform f on the indexed tuple-set. f should take a set and tuple, + and return the new set." + [idxs tuple f] + (into {} (for [ik (keys idxs)] + (let [im (idxs ik) + iv (tuple ik) + os (get im iv #{}) + ns (f os tuple)] + [ik (if (empty? ns) + (dissoc im iv) + (assoc im iv (f os tuple)))])))) + +(defn- add-to-indexes + "Adds the tuple to the appropriate keys in the index map" + [idxs tuple] + (modify-indexes idxs tuple conj)) + +(defn- remove-from-indexes + "Removes the tuple from the appropriate keys in the index map" + [idxs tuple] + (modify-indexes idxs tuple disj)) + +(defn add-tuple + "Two forms: + + [db relation-name tuple] adds tuple to the named relation. Returns + the new database. + + [rel tuple] adds to the relation object. Returns the new relation." + ([db rel-name tuple] + (assert (= (-> tuple keys set) (-> rel-name db :schema))) + (assoc db rel-name (add-tuple (db rel-name) tuple))) + ([rel tuple] + (let [data (:data rel) + new-data (conj data tuple)] + (if (identical? data new-data) ; optimization hack! + rel + (let [idxs (add-to-indexes (:indexes rel) tuple)] + (assoc rel :data new-data :indexes idxs)))))) + +(defn remove-tuple + "Two forms: + + [db relation-name tuple] removes the tuple from the named relation, + returns a new database. + + [rel tuple] removes the tuple from the relation. Returns the new + relation." + ([db rel-name tuple] (assoc db rel-name (remove-tuple (db rel-name) tuple))) + ([rel tuple] + (let [data (:data rel) + new-data (disj data tuple)] + (if (identical? data new-data) + rel + (let [idxs (remove-from-indexes (:indexes rel) tuple)] + (assoc rel :data new-data :indexes idxs)))))) + +(defn add-tuples + "Adds a collection of tuples to the db, as + (add-tuples db + [:rel-name :key-1 1 :key-2 2] + [:rel-name :key-1 2 :key-2 3])" + [db & tupls] + (reduce #(add-tuple %1 (first %2) (apply hash-map (next %2))) db tupls)) + +(defn- find-indexes + "Given a map of indexes and a partial tuple, return the sets of full tuples" + [idxs pt] + (if (empty? idxs) + nil + (filter identity (for [key (keys pt)] + (if-let [idx-map (idxs key)] + (get idx-map (pt key) #{}) + nil))))) + +(defn- match? + "Is m2 contained in m1?" + [m1 m2] + (let [compare (fn [key] + (and (contains? m1 key) + (= (m1 key) (m2 key))))] + (every? compare (keys m2)))) + +(defn- scan-space + "Computes a stream of tuples from relation rn matching partial tuple (pt) + and applies fun to each" + [fun db rn pt] + (let [rel (db rn) + idxs (find-indexes (:indexes rel) pt) + space (if (empty? idxs) + (:data rel) ; table scan :( + (reduce intersection idxs))] + (trace-datalog (when (empty? idxs) + (println (format "Table scan of %s: %s rows!!!!!" + rn + (count space))))) + (fun #(match? % pt) space))) + +(defn select + "finds all matching tuples to the partial tuple (pt) in the relation named (rn)" + [db rn pt] + (scan-space filter db rn pt)) + +(defn any-match? + "Finds if there are any matching records for the partial tuple" + [db rn pt] + (if (= (-> pt keys set) (:schema (db rn))) + (contains? (:data (db rn)) pt) + (scan-space some db rn pt))) + + +;;; Merge + +(defn merge-indexes + [idx1 idx2] + (merge-with (fn [h1 h2] (merge-with union h1 h2)) idx1 idx2)) + +(defn merge-relations + "Merges two relations" + [r1 r2] + (assert (= (:schema r1) (:schema r2))) + (let [merged-indexes (merge-indexes (:indexes r1) + (:indexes r2)) + merged-data (union (:data r1) + (:data r2))] + (assoc r1 :data merged-data :indexes merged-indexes))) + +(defn database-merge + "Merges databases together" + [dbs] + (apply merge-with merge-relations dbs)) + +(defn database-merge-parallel + "Merges databases together in parallel" + [dbs] + (preduce merge-relations dbs)) + + +;; End of file |