New module clojure.contrib.probabilities.dist (with examples and entry in build.xml)

author: Konrad Hinsen <konrad.hinsen@laposte.net> 2009-01-08 09:38:27 +0000
committer: Konrad Hinsen <konrad.hinsen@laposte.net> 2009-01-08 09:38:27 +0000
commit: b87dc4232981ee3e577aa041da2bf7bb80e508b4 (patch)
tree: 467d695db601138ffc8af0442195b9954dc4385d /src/clojure
parent: b78dd0739319e226a793a8986682dbe8db844ccd (diff)
2 files changed, 317 insertions, 0 deletions
diff --git a/src/clojure/contrib/probabilities/dist.clj b/src/clojure/contrib/probabilities/dist.clj
new file mode 100644
index 00000000..5b860505
--- /dev/null
+++ b/src/clojure/contrib/probabilities/dist.clj
@@ -0,0 +1,136 @@
+;; Finite probability distributions
+
+;; by Konrad Hinsen
+;; last updated January 8, 2009
+
+;; Copyright (c) Konrad Hinsen, 2009. All rights reserved.  The use
+;; and distribution terms for this software are covered by the Eclipse
+;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this
+;; distribution.  By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license.  You must not
+;; remove this notice, or any other, from this software.
+
+(ns clojure.contrib.probabilities.dist
+  (:use clojure.contrib.monads clojure.contrib.macros
+	clojure.contrib.monads clojure.contrib.def))
+
+; The probability distribution monad. It is limited to finite probability
+; distributions (e.g. there is a finite number of possible value), which
+; are represented as maps from values to probabilities.
+
+(defmonad dist
+  "Monad describing computations on fuzzy quantities, represented by a finite
+   probability distribution for the possible values. A distribution is
+   represented by a map from values to probabilities."
+  [m-result (fn m-result-dist [v]
+       {v 1})
+   m-bind   (fn m-bind-dist [mv f]
+	      (letfn [add-prob [dist [x p]]
+		        (assoc dist x (+ (get dist x 0) p))]
+	        (reduce add-prob {}
+		        (for [[x p] mv  [y q] (f x)]
+			  [y (* q p)]))))
+   ])
+
+; Applying the monad transformer maybe-t to the basic dist monad results
+; in the cond-dist monad that can handle invalid values. The total probability
+; for invalid values ends up as the probability of m-zero (which is nil).
+; The function normalize takes this probability out of the distribution and
+; re-distributes its weight over the valid values.
+
+(defvar cond-dist
+  (maybe-t dist)
+  "Variant of the dist monad that can handle undefined values.")
+
+(defn normalize [cdist]
+  "Normalize a probability distribution resulting from a computation in
+   the cond-dist monad by re-distributing the weight of the invalid values
+   over the valid ones."
+  (let [missing (get cdist nil 0)
+	dist    (dissoc cdist nil)]
+    (cond (= 1 missing)   {}
+	  (zero? missing) dist
+	  :else (let [scale  (/ 1 (- 1 missing))]
+		  (into {} (for [[val p] dist :when (> p 0)]
+			     [val (* p scale)]))))))
+
+; Functions that construct distributions
+
+(defn uniform
+  "Return a distribution in which each of the elements of coll
+   has the same probability."
+  [coll]
+  (let [n (count coll)
+	p (/ 1 n)]
+    (into {} (for [x (seq coll)] [x p]))))
+
+(defn choose
+  "Construct a distribution from an explicit list of probabilities
+   and values. They are given in the form of a vector of probability-value
+   pairs. In the last pair, the probability can be given by the keyword
+   :else, which stands for 1 minus the total of the other probabilities."
+  [& choices]
+  (letfn [add-choice [dist [p v]]
+	    (cond (nil? p) dist
+		  (= p :else)
+		       (let [total-p (reduce + (vals dist))]
+		         (assoc dist v (- 1 total-p)))
+		  :else (assoc dist v p))]
+    (reduce add-choice {} (partition 2 choices))))
+
+(with-monad dist
+
+  (defn certainly
+    "Returns a distribution in which the single value v has probability 1."
+    [v]
+    (m-result v))
+
+  (defn join-with
+    "Returns the distribution of (f x y) with x from dist1 and y from dist2."
+    [f dist1 dist2]
+    ((m-lift 2 f) dist1 dist2))
+
+)
+
+(defn cond-prob
+  "Returns the conditional probability for the values in dist that satisfy
+   the predicate pred."
+  [pred dist]
+  (normalize
+    (with-monad cond-dist
+      (m-bind dist (fn [v] (m-result (when (pred v) v)))))))
+
+; Select (with equal probability) N items from a sequence
+
+(defn- nth-and-rest [n xs]
+  "Return a list containing the n-th value of xs and the sequence
+   obtained by removing the n-th value from xs."
+  (let [[h t] (split-at n xs)]
+    (list (first t) (concat h (rest t)))))
+
+(with-monad dist
+
+  (defn- select-n [n xs]
+    (letfn [select-1 [[s xs]]
+	      (uniform (for [i (range (count xs))]
+			 (let [[nth rest] (nth-and-rest i xs)]
+			   (list (cons nth s) rest))))]
+      ((m-chain (replicate n select-1)) (list '() xs))))
+
+  (defn select [n xs]
+    "Return the distribution for all possible ordered selections of n elements
+     out of xs."
+    ((m-lift 1 first) (select-n n xs)))
+
+)
+
+; Find the probability that a given predicate is satisfied
+
+(defn prob
+  "Return the probability that the predicate pred is satisfied in the
+   distribution dist, i.e. the sum of the probabilities of the values
+   that satisfy pred."
+  [pred dist]
+  (apply + (for [[x p] dist :when (pred x)] p)))
+
diff --git a/src/clojure/contrib/probabilities/dist/examples.clj b/src/clojure/contrib/probabilities/dist/examples.clj
new file mode 100644
index 00000000..36c39ada
--- /dev/null
+++ b/src/clojure/contrib/probabilities/dist/examples.clj
@@ -0,0 +1,181 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Probability distribution application examples
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(use 'clojure.contrib.probabilities.dist
+     'clojure.contrib.monads)
+
+;; Simple examples using dice
+
+; A single die is represented by a uniform distribution over the
+; six possible outcomes.
+(def die (uniform #{1 2 3 4 5 6}))
+
+; The probability that the result is odd...
+(prob odd? die)
+; ... or greater than four.
+(prob #(> % 4) die)
+
+; The sum of two dice
+(def two-dice (join-with + die die))
+(prob #(> % 6) two-dice)
+
+; The sum of two dice using a monad comprehension
+(assert (= two-dice
+	   (domonad dist
+		    [d1 die
+		     d2 die]
+		    (+ d1 d2))))
+
+; The two values separately, but as an ordered pair
+(domonad dist
+  [d1 die
+   d2 die]
+  (if (< d1 d2) (list d1 d2) (list d2 d1)))
+
+; The conditional probability for two dice yielding X if X is odd:
+(cond-prob odd? two-dice)
+
+; A two-step experiment: throw a die, and then add 1 with probability 1/2
+(domonad dist
+  [d die
+   x (choose (/ 1 2)  d
+	     :else    (inc d))]
+  x)
+
+; The sum of n dice
+(defn dice [n]
+   (domonad dist
+      [ds (m-seq (replicate n die))]
+      (apply + ds)))
+
+(assert (= two-dice (dice 2)))
+
+(dice 3)
+
+
+;; The Monty Hall game
+;; (see http://en.wikipedia.org/wiki/Monty_Hall_problem for a description)
+
+; The set of doors. In the classical variant, there are three doors,
+; but the code can also work with more than three doors.
+(def doors #{:A :B :C})
+
+; A simulation of the game, step by step:
+(domonad dist
+  [; The prize is hidden behind one of the doors.
+   prize  (uniform doors)
+   ; The player make his initial choice.
+   choice (uniform doors)
+   ; The host opens a door which is neither the prize door nor the
+   ; one chosen by the player.
+   opened (uniform (disj doors prize choice))
+   ; If the player stays with his initial choice, the game ends and the
+   ; following line should be commented out. It describes the switch from
+   ; the initial choice to a door that is neither the opened one nor
+   ; his original choice.
+   choice (uniform (disj doors opened choice))
+   ]
+  ; If the chosen door has the prize behind it, the player wins.
+  (if (= choice prize) :win :loose))
+
+
+;; Tree growth simulation
+;; Adapted from the code in:
+;; Martin Erwig and Steve Kollmansberger,
+;; "Probabilistic Functional Programming in Haskell",
+;; Journal of Functional Programming, Vol. 16, No. 1, 21-34, 2006
+;; http://web.engr.oregonstate.edu/~erwig/papers/abstracts.html#JFP06a
+
+; A tree is represented by two attributes: its state (alive, hit, fallen),
+; and its height (an integer). A new tree starts out alive and with zero height.
+(def new-tree {:state :alive, :height 0})
+
+; An evolution step in the simulation modifies alive trees only. They can
+; either grow by one (90% probability), be hit by lightning and then stop
+; growing (4% probability), or fall down (6% probability).
+(defn evolve-1 [tree]
+  (let [{s :state h :height} tree]
+    (if (= s :alive)
+      (choose 0.9   (assoc tree :height (inc (:height tree)))
+	      0.04  (assoc tree :state :hit) 
+	      :else {:state :fallen, :height 0})
+      (certainly tree))))
+
+; Multiple evolution steps can be chained together with m-chain,
+; since each step's input is the output of the previous step.
+(with-monad dist
+  (defn evolve [n tree]
+    ((m-chain (replicate n evolve-1)) tree)))
+
+; Try it for zero, one, or two steps.
+(evolve 0 new-tree)
+(evolve 1 new-tree)
+(evolve 2 new-tree)
+
+; We can also get a distribution of the height only:
+(with-monad dist
+  ((m-lift 1 :height) (evolve 2 new-tree)))
+
+
+
+;; Bayesian inference
+;;
+;; Suppose someone has three dice, one with six faces, one with eight, and
+;; one with twelve. This person throws one die and gives us the number,
+;; but doesn't tell us which die it was. What are the Bayesian probabilities
+;; for each of the three dice, given the observation we have?
+
+; A function that returns the distribution of a dice with n faces.
+(defn die-n [n] (uniform (range 1 (inc n))))
+
+; The three dice in the game with their distributions. With this map, we
+; can easily calculate the probability for an observation under the
+; condition that a particular die was used.
+(def dice {:six     (die-n 6)
+	   :eight   (die-n 8)
+	   :twelve  (die-n 12)})
+
+; The only prior knowledge is that one of the three dice is used, so we
+; have no better than a uniform distribution to start with.
+(def prior (uniform (keys dice)))
+
+; Add a single observation to the information contained in the
+; distribution. Adding an observation consists of
+; 1) Draw a die from the prior distribution.
+; 2) Draw an observation from the distribution of that die.
+; 3) Eliminate (replace by nil) the trials that do not match the observation.
+; 4) Normalize the distribution for the non-nil values.
+(defn add-observation [prior observation]
+  (normalize
+    (domonad cond-dist
+      [die    prior
+       number (get dice die)]
+      (when (= number observation) die))))
+
+; Add one observation.
+(add-observation prior 1)
+
+; Add three consecutive observations.
+(-> prior (add-observation 1)
+          (add-observation 3)
+	  (add-observation 7))
+
+; We can also add multiple observations in a single trial, but this
+; is slower because more combinations have to be taken into account.
+; With Bayesian inference, it is most efficient to eliminate choices
+; as early as possible.
+(defn add-observations [prior observations]
+  (with-monad cond-dist
+    (let [n-nums #(m-seq (replicate (count observations) (get dice %)))]
+      (normalize
+        (domonad
+          [die    prior
+           nums   (n-nums die)]
+	  (when (= nums observations) die))))))
+
+(add-observations prior [1 3 7])
author	Konrad Hinsen <konrad.hinsen@laposte.net>	2009-01-08 09:38:27 +0000
committer	Konrad Hinsen <konrad.hinsen@laposte.net>	2009-01-08 09:38:27 +0000
commit	b87dc4232981ee3e577aa041da2bf7bb80e508b4 (patch)
tree	467d695db601138ffc8af0442195b9954dc4385d /src/clojure
parent	b78dd0739319e226a793a8986682dbe8db844ccd (diff)