aboutsummaryrefslogtreecommitdiff
path: root/src/clojure/contrib/probabilities/finite_distributions.clj
blob: 861cdc6529f23f06d8518a15bce63cffc5a0a1f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
;; Finite probability distributions

;; by Konrad Hinsen
;; last updated May 11, 2009

;; Copyright (c) Konrad Hinsen, 2009. All rights reserved.  The use
;; and distribution terms for this software are covered by the Eclipse
;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file epl-v10.html at the root of this
;; distribution.  By using this software in any fashion, you are
;; agreeing to be bound by the terms of this license.  You must not
;; remove this notice, or any other, from this software.

(ns
  #^{:author "Konrad Hinsen"
     :doc "Finite probability distributions
           This library defines a monad for combining finite probability
           distributions."}
  clojure.contrib.probabilities.finite-distributions
  (:use [clojure.contrib.monads
	 :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
	 [clojure.contrib.def :only (defvar)]))

; The probability distribution monad. It is limited to finite probability
; distributions (e.g. there is a finite number of possible value), which
; are represented as maps from values to probabilities.

(defmonad dist-m
  "Monad describing computations on fuzzy quantities, represented by a finite
   probability distribution for the possible values. A distribution is
   represented by a map from values to probabilities."
  [m-result (fn m-result-dist [v]
	      {v 1})
   m-bind   (fn m-bind-dist [mv f]
	      (letfn [(add-prob [dist [x p]]
		         (assoc dist x (+ (get dist x 0) p)))]
	        (reduce add-prob {}
		        (for [[x p] mv  [y q] (f x)]
			  [y (* q p)]))))
   ])

; Applying the monad transformer maybe-t to the basic dist monad results
; in the cond-dist monad that can handle invalid values. The total probability
; for invalid values ends up as the probability of m-zero (which is nil).
; The function normalize takes this probability out of the distribution and
; re-distributes its weight over the valid values.

(defvar cond-dist-m
  (maybe-t dist-m)
  "Variant of the dist monad that can handle undefined values.")

; Normalization

(defn- scale-by
  "Multiply each entry in dist by the scale factor s and remove zero entries."
  [dist s]
  (into {}
	(for [[val p] dist :when (> p 0)]
	  [val (* p s)])))

(defn normalize-cond [cdist]
  "Normalize a probability distribution resulting from a computation in
   the cond-dist monad by re-distributing the weight of the invalid values
   over the valid ones."
  (let [missing (get cdist nil 0)
	dist    (dissoc cdist nil)]
    (cond (zero? missing) dist
	  (= 1 missing)   {}
	  :else (let [scale  (/ 1 (- 1 missing))]
		  (scale-by dist scale)))))

(defn normalize
  "Convert a weight map (e.g. a map of counter values) to a distribution
   by multiplying with a normalization factor. If the map has a key
   :total, its value is assumed to be the sum over all the other values and
   it is used for normalization. Otherwise, the sum is calculated
   explicitly. The :total key is removed from the resulting distribution."
  [weights]
  (let [total (:total weights)
	w (dissoc weights :total)
	s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
    (scale-by w s)))

; Functions that construct distributions

(defn uniform
  "Return a distribution in which each of the elements of coll
   has the same probability."
  [coll]
  (let [n (count coll)
	p (/ 1 n)]
    (into {} (for [x (seq coll)] [x p]))))

(defn choose
  "Construct a distribution from an explicit list of probabilities
   and values. They are given in the form of a vector of probability-value
   pairs. In the last pair, the probability can be given by the keyword
   :else, which stands for 1 minus the total of the other probabilities."
  [& choices]
  (letfn [(add-choice [dist [p v]]
	    (cond (nil? p) dist
		  (= p :else)
		        (let [total-p (reduce + (vals dist))]
			  (assoc dist v (- 1 total-p)))
		  :else (assoc dist v p)))]
    (reduce add-choice {} (partition 2 choices))))

(defn bernoulli
  [p]
  "Returns the Bernoulli distribution for probability p."
  (choose p 1 :else 0))

(defn- bc
  [n]
  "Returns the binomial coefficients for a given n."
  (let [r (inc n)]
     (loop [c 1
	    f (list 1)]
       (if (> c n)
	 f
	 (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))

(defn binomial
  [n p]
  "Returns the binomial distribution, which is the distribution of the
   number of successes in a series of n experiments whose individual
   success probability is p."
  (let [q (- 1 p)
	n1 (inc n)
	k (range n1)
	pk (take n1 (iterate #(* p %) 1))
	ql (reverse (take n1 (iterate #(* q %) 1)))
	f (bc n)]
    (into {} (map vector k (map * f pk ql)))))

(defn make-distribution
  "Returns the distribution in which each element x of the collection
   has a probability proportional to (f x)"
  [coll f]
  (normalize (into {} (for [k coll] [k (f k)]))))

(defn zipf
  "Returns the Zipf distribution in which the numbers k=1..n have
   probabilities proportional to 1/k^s."
  [s n]
  (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))

(defn certainly
  "Returns a distribution in which the single value v has probability 1."
  [v]
  {v 1})

(with-monad dist-m

  (defn join-with
    "Returns the distribution of (f x y) with x from dist1 and y from dist2."
    [f dist1 dist2]
    ((m-lift 2 f) dist1 dist2))

)

(with-monad cond-dist-m
  (defn cond-prob
    "Returns the conditional probability for the values in dist that satisfy
     the predicate pred."
    [pred dist]
    (normalize-cond
      (domonad
        [v dist
	 :when (pred v)]
	v))))

; Select (with equal probability) N items from a sequence

(defn- nth-and-rest [n xs]
  "Return a list containing the n-th value of xs and the sequence
   obtained by removing the n-th value from xs."
  (let [[h t] (split-at n xs)]
    (list (first t) (concat h (rest t)))))

(with-monad dist-m

  (defn- select-n [n xs]
    (letfn [(select-1 [[s xs]]
	      (uniform (for [i (range (count xs))]
			 (let [[nth rest] (nth-and-rest i xs)]
			   (list (cons nth s) rest)))))]
      ((m-chain (replicate n select-1)) (list '() xs))))

  (defn select [n xs]
    "Return the distribution for all possible ordered selections of n elements
     out of xs."
    ((m-lift 1 first) (select-n n xs)))

)

; Find the probability that a given predicate is satisfied

(defn prob
  "Return the probability that the predicate pred is satisfied in the
   distribution dist, i.e. the sum of the probabilities of the values
   that satisfy pred."
  [pred dist]
  (apply + (for [[x p] dist :when (pred x)] p)))