diff options
author | Konrad Hinsen <konrad.hinsen@laposte.net> | 2009-01-08 09:38:27 +0000 |
---|---|---|
committer | Konrad Hinsen <konrad.hinsen@laposte.net> | 2009-01-08 09:38:27 +0000 |
commit | b87dc4232981ee3e577aa041da2bf7bb80e508b4 (patch) | |
tree | 467d695db601138ffc8af0442195b9954dc4385d /src/clojure | |
parent | b78dd0739319e226a793a8986682dbe8db844ccd (diff) |
New module clojure.contrib.probabilities.dist (with examples and entry in build.xml)
Diffstat (limited to 'src/clojure')
-rw-r--r-- | src/clojure/contrib/probabilities/dist.clj | 136 | ||||
-rw-r--r-- | src/clojure/contrib/probabilities/dist/examples.clj | 181 |
2 files changed, 317 insertions, 0 deletions
diff --git a/src/clojure/contrib/probabilities/dist.clj b/src/clojure/contrib/probabilities/dist.clj new file mode 100644 index 00000000..5b860505 --- /dev/null +++ b/src/clojure/contrib/probabilities/dist.clj @@ -0,0 +1,136 @@ +;; Finite probability distributions + +;; by Konrad Hinsen +;; last updated January 8, 2009 + +;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use +;; and distribution terms for this software are covered by the Eclipse +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this +;; distribution. By using this software in any fashion, you are +;; agreeing to be bound by the terms of this license. You must not +;; remove this notice, or any other, from this software. + +(ns clojure.contrib.probabilities.dist + (:use clojure.contrib.monads clojure.contrib.macros + clojure.contrib.monads clojure.contrib.def)) + +; The probability distribution monad. It is limited to finite probability +; distributions (e.g. there is a finite number of possible value), which +; are represented as maps from values to probabilities. + +(defmonad dist + "Monad describing computations on fuzzy quantities, represented by a finite + probability distribution for the possible values. A distribution is + represented by a map from values to probabilities." + [m-result (fn m-result-dist [v] + {v 1}) + m-bind (fn m-bind-dist [mv f] + (letfn [add-prob [dist [x p]] + (assoc dist x (+ (get dist x 0) p))] + (reduce add-prob {} + (for [[x p] mv [y q] (f x)] + [y (* q p)])))) + ]) + +; Applying the monad transformer maybe-t to the basic dist monad results +; in the cond-dist monad that can handle invalid values. The total probability +; for invalid values ends up as the probability of m-zero (which is nil). +; The function normalize takes this probability out of the distribution and +; re-distributes its weight over the valid values. + +(defvar cond-dist + (maybe-t dist) + "Variant of the dist monad that can handle undefined values.") + +(defn normalize [cdist] + "Normalize a probability distribution resulting from a computation in + the cond-dist monad by re-distributing the weight of the invalid values + over the valid ones." + (let [missing (get cdist nil 0) + dist (dissoc cdist nil)] + (cond (= 1 missing) {} + (zero? missing) dist + :else (let [scale (/ 1 (- 1 missing))] + (into {} (for [[val p] dist :when (> p 0)] + [val (* p scale)])))))) + +; Functions that construct distributions + +(defn uniform + "Return a distribution in which each of the elements of coll + has the same probability." + [coll] + (let [n (count coll) + p (/ 1 n)] + (into {} (for [x (seq coll)] [x p])))) + +(defn choose + "Construct a distribution from an explicit list of probabilities + and values. They are given in the form of a vector of probability-value + pairs. In the last pair, the probability can be given by the keyword + :else, which stands for 1 minus the total of the other probabilities." + [& choices] + (letfn [add-choice [dist [p v]] + (cond (nil? p) dist + (= p :else) + (let [total-p (reduce + (vals dist))] + (assoc dist v (- 1 total-p))) + :else (assoc dist v p))] + (reduce add-choice {} (partition 2 choices)))) + +(with-monad dist + + (defn certainly + "Returns a distribution in which the single value v has probability 1." + [v] + (m-result v)) + + (defn join-with + "Returns the distribution of (f x y) with x from dist1 and y from dist2." + [f dist1 dist2] + ((m-lift 2 f) dist1 dist2)) + +) + +(defn cond-prob + "Returns the conditional probability for the values in dist that satisfy + the predicate pred." + [pred dist] + (normalize + (with-monad cond-dist + (m-bind dist (fn [v] (m-result (when (pred v) v))))))) + +; Select (with equal probability) N items from a sequence + +(defn- nth-and-rest [n xs] + "Return a list containing the n-th value of xs and the sequence + obtained by removing the n-th value from xs." + (let [[h t] (split-at n xs)] + (list (first t) (concat h (rest t))))) + +(with-monad dist + + (defn- select-n [n xs] + (letfn [select-1 [[s xs]] + (uniform (for [i (range (count xs))] + (let [[nth rest] (nth-and-rest i xs)] + (list (cons nth s) rest))))] + ((m-chain (replicate n select-1)) (list '() xs)))) + + (defn select [n xs] + "Return the distribution for all possible ordered selections of n elements + out of xs." + ((m-lift 1 first) (select-n n xs))) + +) + +; Find the probability that a given predicate is satisfied + +(defn prob + "Return the probability that the predicate pred is satisfied in the + distribution dist, i.e. the sum of the probabilities of the values + that satisfy pred." + [pred dist] + (apply + (for [[x p] dist :when (pred x)] p))) + diff --git a/src/clojure/contrib/probabilities/dist/examples.clj b/src/clojure/contrib/probabilities/dist/examples.clj new file mode 100644 index 00000000..36c39ada --- /dev/null +++ b/src/clojure/contrib/probabilities/dist/examples.clj @@ -0,0 +1,181 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Probability distribution application examples +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(use 'clojure.contrib.probabilities.dist + 'clojure.contrib.monads) + +;; Simple examples using dice + +; A single die is represented by a uniform distribution over the +; six possible outcomes. +(def die (uniform #{1 2 3 4 5 6})) + +; The probability that the result is odd... +(prob odd? die) +; ... or greater than four. +(prob #(> % 4) die) + +; The sum of two dice +(def two-dice (join-with + die die)) +(prob #(> % 6) two-dice) + +; The sum of two dice using a monad comprehension +(assert (= two-dice + (domonad dist + [d1 die + d2 die] + (+ d1 d2)))) + +; The two values separately, but as an ordered pair +(domonad dist + [d1 die + d2 die] + (if (< d1 d2) (list d1 d2) (list d2 d1))) + +; The conditional probability for two dice yielding X if X is odd: +(cond-prob odd? two-dice) + +; A two-step experiment: throw a die, and then add 1 with probability 1/2 +(domonad dist + [d die + x (choose (/ 1 2) d + :else (inc d))] + x) + +; The sum of n dice +(defn dice [n] + (domonad dist + [ds (m-seq (replicate n die))] + (apply + ds))) + +(assert (= two-dice (dice 2))) + +(dice 3) + + +;; The Monty Hall game +;; (see http://en.wikipedia.org/wiki/Monty_Hall_problem for a description) + +; The set of doors. In the classical variant, there are three doors, +; but the code can also work with more than three doors. +(def doors #{:A :B :C}) + +; A simulation of the game, step by step: +(domonad dist + [; The prize is hidden behind one of the doors. + prize (uniform doors) + ; The player make his initial choice. + choice (uniform doors) + ; The host opens a door which is neither the prize door nor the + ; one chosen by the player. + opened (uniform (disj doors prize choice)) + ; If the player stays with his initial choice, the game ends and the + ; following line should be commented out. It describes the switch from + ; the initial choice to a door that is neither the opened one nor + ; his original choice. + choice (uniform (disj doors opened choice)) + ] + ; If the chosen door has the prize behind it, the player wins. + (if (= choice prize) :win :loose)) + + +;; Tree growth simulation +;; Adapted from the code in: +;; Martin Erwig and Steve Kollmansberger, +;; "Probabilistic Functional Programming in Haskell", +;; Journal of Functional Programming, Vol. 16, No. 1, 21-34, 2006 +;; http://web.engr.oregonstate.edu/~erwig/papers/abstracts.html#JFP06a + +; A tree is represented by two attributes: its state (alive, hit, fallen), +; and its height (an integer). A new tree starts out alive and with zero height. +(def new-tree {:state :alive, :height 0}) + +; An evolution step in the simulation modifies alive trees only. They can +; either grow by one (90% probability), be hit by lightning and then stop +; growing (4% probability), or fall down (6% probability). +(defn evolve-1 [tree] + (let [{s :state h :height} tree] + (if (= s :alive) + (choose 0.9 (assoc tree :height (inc (:height tree))) + 0.04 (assoc tree :state :hit) + :else {:state :fallen, :height 0}) + (certainly tree)))) + +; Multiple evolution steps can be chained together with m-chain, +; since each step's input is the output of the previous step. +(with-monad dist + (defn evolve [n tree] + ((m-chain (replicate n evolve-1)) tree))) + +; Try it for zero, one, or two steps. +(evolve 0 new-tree) +(evolve 1 new-tree) +(evolve 2 new-tree) + +; We can also get a distribution of the height only: +(with-monad dist + ((m-lift 1 :height) (evolve 2 new-tree))) + + + +;; Bayesian inference +;; +;; Suppose someone has three dice, one with six faces, one with eight, and +;; one with twelve. This person throws one die and gives us the number, +;; but doesn't tell us which die it was. What are the Bayesian probabilities +;; for each of the three dice, given the observation we have? + +; A function that returns the distribution of a dice with n faces. +(defn die-n [n] (uniform (range 1 (inc n)))) + +; The three dice in the game with their distributions. With this map, we +; can easily calculate the probability for an observation under the +; condition that a particular die was used. +(def dice {:six (die-n 6) + :eight (die-n 8) + :twelve (die-n 12)}) + +; The only prior knowledge is that one of the three dice is used, so we +; have no better than a uniform distribution to start with. +(def prior (uniform (keys dice))) + +; Add a single observation to the information contained in the +; distribution. Adding an observation consists of +; 1) Draw a die from the prior distribution. +; 2) Draw an observation from the distribution of that die. +; 3) Eliminate (replace by nil) the trials that do not match the observation. +; 4) Normalize the distribution for the non-nil values. +(defn add-observation [prior observation] + (normalize + (domonad cond-dist + [die prior + number (get dice die)] + (when (= number observation) die)))) + +; Add one observation. +(add-observation prior 1) + +; Add three consecutive observations. +(-> prior (add-observation 1) + (add-observation 3) + (add-observation 7)) + +; We can also add multiple observations in a single trial, but this +; is slower because more combinations have to be taken into account. +; With Bayesian inference, it is most efficient to eliminate choices +; as early as possible. +(defn add-observations [prior observations] + (with-monad cond-dist + (let [n-nums #(m-seq (replicate (count observations) (get dice %)))] + (normalize + (domonad + [die prior + nums (n-nums die)] + (when (= nums observations) die)))))) + +(add-observations prior [1 3 7]) |