aboutsummaryrefslogtreecommitdiff
path: root/src/clojure
diff options
context:
space:
mode:
authorKonrad Hinsen <konrad.hinsen@laposte.net>2009-01-08 09:38:27 +0000
committerKonrad Hinsen <konrad.hinsen@laposte.net>2009-01-08 09:38:27 +0000
commitb87dc4232981ee3e577aa041da2bf7bb80e508b4 (patch)
tree467d695db601138ffc8af0442195b9954dc4385d /src/clojure
parentb78dd0739319e226a793a8986682dbe8db844ccd (diff)
New module clojure.contrib.probabilities.dist (with examples and entry in build.xml)
Diffstat (limited to 'src/clojure')
-rw-r--r--src/clojure/contrib/probabilities/dist.clj136
-rw-r--r--src/clojure/contrib/probabilities/dist/examples.clj181
2 files changed, 317 insertions, 0 deletions
diff --git a/src/clojure/contrib/probabilities/dist.clj b/src/clojure/contrib/probabilities/dist.clj
new file mode 100644
index 00000000..5b860505
--- /dev/null
+++ b/src/clojure/contrib/probabilities/dist.clj
@@ -0,0 +1,136 @@
+;; Finite probability distributions
+
+;; by Konrad Hinsen
+;; last updated January 8, 2009
+
+;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use
+;; and distribution terms for this software are covered by the Eclipse
+;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this
+;; distribution. By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license. You must not
+;; remove this notice, or any other, from this software.
+
+(ns clojure.contrib.probabilities.dist
+ (:use clojure.contrib.monads clojure.contrib.macros
+ clojure.contrib.monads clojure.contrib.def))
+
+; The probability distribution monad. It is limited to finite probability
+; distributions (e.g. there is a finite number of possible value), which
+; are represented as maps from values to probabilities.
+
+(defmonad dist
+ "Monad describing computations on fuzzy quantities, represented by a finite
+ probability distribution for the possible values. A distribution is
+ represented by a map from values to probabilities."
+ [m-result (fn m-result-dist [v]
+ {v 1})
+ m-bind (fn m-bind-dist [mv f]
+ (letfn [add-prob [dist [x p]]
+ (assoc dist x (+ (get dist x 0) p))]
+ (reduce add-prob {}
+ (for [[x p] mv [y q] (f x)]
+ [y (* q p)]))))
+ ])
+
+; Applying the monad transformer maybe-t to the basic dist monad results
+; in the cond-dist monad that can handle invalid values. The total probability
+; for invalid values ends up as the probability of m-zero (which is nil).
+; The function normalize takes this probability out of the distribution and
+; re-distributes its weight over the valid values.
+
+(defvar cond-dist
+ (maybe-t dist)
+ "Variant of the dist monad that can handle undefined values.")
+
+(defn normalize [cdist]
+ "Normalize a probability distribution resulting from a computation in
+ the cond-dist monad by re-distributing the weight of the invalid values
+ over the valid ones."
+ (let [missing (get cdist nil 0)
+ dist (dissoc cdist nil)]
+ (cond (= 1 missing) {}
+ (zero? missing) dist
+ :else (let [scale (/ 1 (- 1 missing))]
+ (into {} (for [[val p] dist :when (> p 0)]
+ [val (* p scale)]))))))
+
+; Functions that construct distributions
+
+(defn uniform
+ "Return a distribution in which each of the elements of coll
+ has the same probability."
+ [coll]
+ (let [n (count coll)
+ p (/ 1 n)]
+ (into {} (for [x (seq coll)] [x p]))))
+
+(defn choose
+ "Construct a distribution from an explicit list of probabilities
+ and values. They are given in the form of a vector of probability-value
+ pairs. In the last pair, the probability can be given by the keyword
+ :else, which stands for 1 minus the total of the other probabilities."
+ [& choices]
+ (letfn [add-choice [dist [p v]]
+ (cond (nil? p) dist
+ (= p :else)
+ (let [total-p (reduce + (vals dist))]
+ (assoc dist v (- 1 total-p)))
+ :else (assoc dist v p))]
+ (reduce add-choice {} (partition 2 choices))))
+
+(with-monad dist
+
+ (defn certainly
+ "Returns a distribution in which the single value v has probability 1."
+ [v]
+ (m-result v))
+
+ (defn join-with
+ "Returns the distribution of (f x y) with x from dist1 and y from dist2."
+ [f dist1 dist2]
+ ((m-lift 2 f) dist1 dist2))
+
+)
+
+(defn cond-prob
+ "Returns the conditional probability for the values in dist that satisfy
+ the predicate pred."
+ [pred dist]
+ (normalize
+ (with-monad cond-dist
+ (m-bind dist (fn [v] (m-result (when (pred v) v)))))))
+
+; Select (with equal probability) N items from a sequence
+
+(defn- nth-and-rest [n xs]
+ "Return a list containing the n-th value of xs and the sequence
+ obtained by removing the n-th value from xs."
+ (let [[h t] (split-at n xs)]
+ (list (first t) (concat h (rest t)))))
+
+(with-monad dist
+
+ (defn- select-n [n xs]
+ (letfn [select-1 [[s xs]]
+ (uniform (for [i (range (count xs))]
+ (let [[nth rest] (nth-and-rest i xs)]
+ (list (cons nth s) rest))))]
+ ((m-chain (replicate n select-1)) (list '() xs))))
+
+ (defn select [n xs]
+ "Return the distribution for all possible ordered selections of n elements
+ out of xs."
+ ((m-lift 1 first) (select-n n xs)))
+
+)
+
+; Find the probability that a given predicate is satisfied
+
+(defn prob
+ "Return the probability that the predicate pred is satisfied in the
+ distribution dist, i.e. the sum of the probabilities of the values
+ that satisfy pred."
+ [pred dist]
+ (apply + (for [[x p] dist :when (pred x)] p)))
+
diff --git a/src/clojure/contrib/probabilities/dist/examples.clj b/src/clojure/contrib/probabilities/dist/examples.clj
new file mode 100644
index 00000000..36c39ada
--- /dev/null
+++ b/src/clojure/contrib/probabilities/dist/examples.clj
@@ -0,0 +1,181 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Probability distribution application examples
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(use 'clojure.contrib.probabilities.dist
+ 'clojure.contrib.monads)
+
+;; Simple examples using dice
+
+; A single die is represented by a uniform distribution over the
+; six possible outcomes.
+(def die (uniform #{1 2 3 4 5 6}))
+
+; The probability that the result is odd...
+(prob odd? die)
+; ... or greater than four.
+(prob #(> % 4) die)
+
+; The sum of two dice
+(def two-dice (join-with + die die))
+(prob #(> % 6) two-dice)
+
+; The sum of two dice using a monad comprehension
+(assert (= two-dice
+ (domonad dist
+ [d1 die
+ d2 die]
+ (+ d1 d2))))
+
+; The two values separately, but as an ordered pair
+(domonad dist
+ [d1 die
+ d2 die]
+ (if (< d1 d2) (list d1 d2) (list d2 d1)))
+
+; The conditional probability for two dice yielding X if X is odd:
+(cond-prob odd? two-dice)
+
+; A two-step experiment: throw a die, and then add 1 with probability 1/2
+(domonad dist
+ [d die
+ x (choose (/ 1 2) d
+ :else (inc d))]
+ x)
+
+; The sum of n dice
+(defn dice [n]
+ (domonad dist
+ [ds (m-seq (replicate n die))]
+ (apply + ds)))
+
+(assert (= two-dice (dice 2)))
+
+(dice 3)
+
+
+;; The Monty Hall game
+;; (see http://en.wikipedia.org/wiki/Monty_Hall_problem for a description)
+
+; The set of doors. In the classical variant, there are three doors,
+; but the code can also work with more than three doors.
+(def doors #{:A :B :C})
+
+; A simulation of the game, step by step:
+(domonad dist
+ [; The prize is hidden behind one of the doors.
+ prize (uniform doors)
+ ; The player make his initial choice.
+ choice (uniform doors)
+ ; The host opens a door which is neither the prize door nor the
+ ; one chosen by the player.
+ opened (uniform (disj doors prize choice))
+ ; If the player stays with his initial choice, the game ends and the
+ ; following line should be commented out. It describes the switch from
+ ; the initial choice to a door that is neither the opened one nor
+ ; his original choice.
+ choice (uniform (disj doors opened choice))
+ ]
+ ; If the chosen door has the prize behind it, the player wins.
+ (if (= choice prize) :win :loose))
+
+
+;; Tree growth simulation
+;; Adapted from the code in:
+;; Martin Erwig and Steve Kollmansberger,
+;; "Probabilistic Functional Programming in Haskell",
+;; Journal of Functional Programming, Vol. 16, No. 1, 21-34, 2006
+;; http://web.engr.oregonstate.edu/~erwig/papers/abstracts.html#JFP06a
+
+; A tree is represented by two attributes: its state (alive, hit, fallen),
+; and its height (an integer). A new tree starts out alive and with zero height.
+(def new-tree {:state :alive, :height 0})
+
+; An evolution step in the simulation modifies alive trees only. They can
+; either grow by one (90% probability), be hit by lightning and then stop
+; growing (4% probability), or fall down (6% probability).
+(defn evolve-1 [tree]
+ (let [{s :state h :height} tree]
+ (if (= s :alive)
+ (choose 0.9 (assoc tree :height (inc (:height tree)))
+ 0.04 (assoc tree :state :hit)
+ :else {:state :fallen, :height 0})
+ (certainly tree))))
+
+; Multiple evolution steps can be chained together with m-chain,
+; since each step's input is the output of the previous step.
+(with-monad dist
+ (defn evolve [n tree]
+ ((m-chain (replicate n evolve-1)) tree)))
+
+; Try it for zero, one, or two steps.
+(evolve 0 new-tree)
+(evolve 1 new-tree)
+(evolve 2 new-tree)
+
+; We can also get a distribution of the height only:
+(with-monad dist
+ ((m-lift 1 :height) (evolve 2 new-tree)))
+
+
+
+;; Bayesian inference
+;;
+;; Suppose someone has three dice, one with six faces, one with eight, and
+;; one with twelve. This person throws one die and gives us the number,
+;; but doesn't tell us which die it was. What are the Bayesian probabilities
+;; for each of the three dice, given the observation we have?
+
+; A function that returns the distribution of a dice with n faces.
+(defn die-n [n] (uniform (range 1 (inc n))))
+
+; The three dice in the game with their distributions. With this map, we
+; can easily calculate the probability for an observation under the
+; condition that a particular die was used.
+(def dice {:six (die-n 6)
+ :eight (die-n 8)
+ :twelve (die-n 12)})
+
+; The only prior knowledge is that one of the three dice is used, so we
+; have no better than a uniform distribution to start with.
+(def prior (uniform (keys dice)))
+
+; Add a single observation to the information contained in the
+; distribution. Adding an observation consists of
+; 1) Draw a die from the prior distribution.
+; 2) Draw an observation from the distribution of that die.
+; 3) Eliminate (replace by nil) the trials that do not match the observation.
+; 4) Normalize the distribution for the non-nil values.
+(defn add-observation [prior observation]
+ (normalize
+ (domonad cond-dist
+ [die prior
+ number (get dice die)]
+ (when (= number observation) die))))
+
+; Add one observation.
+(add-observation prior 1)
+
+; Add three consecutive observations.
+(-> prior (add-observation 1)
+ (add-observation 3)
+ (add-observation 7))
+
+; We can also add multiple observations in a single trial, but this
+; is slower because more combinations have to be taken into account.
+; With Bayesian inference, it is most efficient to eliminate choices
+; as early as possible.
+(defn add-observations [prior observations]
+ (with-monad cond-dist
+ (let [n-nums #(m-seq (replicate (count observations) (get dice %)))]
+ (normalize
+ (domonad
+ [die prior
+ nums (n-nums die)]
+ (when (= nums observations) die))))))
+
+(add-observations prior [1 3 7])