aboutsummaryrefslogtreecommitdiff
path: root/modules/probabilities
diff options
context:
space:
mode:
authorStuart Sierra <mail@stuartsierra.com>2010-08-07 16:41:53 -0400
committerStuart Sierra <mail@stuartsierra.com>2010-08-07 16:41:53 -0400
commita6a92b9b3d2bfd9a56e1e5e9cfba706d1aeeaae5 (patch)
treef1f3da9887dc2dc557df3282b0bcbd4d701ec593 /modules/probabilities
parente7930c85290f77815cdb00a60604feedfa2d0194 (diff)
Split all namespaces into sub-modules.
* Examples and tests have not been copied over. * Clojure test/compile phases are commented out in parent POM. * May require installing parent POM before full build.
Diffstat (limited to 'modules/probabilities')
-rw-r--r--modules/probabilities/pom.xml26
-rw-r--r--modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj203
-rw-r--r--modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj240
-rw-r--r--modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj63
4 files changed, 532 insertions, 0 deletions
diff --git a/modules/probabilities/pom.xml b/modules/probabilities/pom.xml
new file mode 100644
index 00000000..5f478a18
--- /dev/null
+++ b/modules/probabilities/pom.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http//www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.clojure.contrib</groupId>
+ <artifactId>parent</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ <relativePath>../parent</relativePath>
+ </parent>
+ <artifactId>probabilities-finite-distributions</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>org.clojure.contrib</groupId>
+ <artifactId>def</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.clojure.contrib</groupId>
+ <artifactId>monads</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+</project> \ No newline at end of file
diff --git a/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj
new file mode 100644
index 00000000..86e5aec0
--- /dev/null
+++ b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj
@@ -0,0 +1,203 @@
+;; Finite probability distributions
+
+;; by Konrad Hinsen
+;; last updated January 8, 2010
+
+;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use
+;; and distribution terms for this software are covered by the Eclipse
+;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this
+;; distribution. By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license. You must not
+;; remove this notice, or any other, from this software.
+
+(ns
+ ^{:author "Konrad Hinsen"
+ :doc "Finite probability distributions
+ This library defines a monad for combining finite probability
+ distributions."}
+ clojure.contrib.probabilities.finite-distributions
+ (:use [clojure.contrib.monads
+ :only (defmonad domonad with-monad maybe-t m-lift m-chain)]
+ [clojure.contrib.def :only (defvar)]))
+
+; The probability distribution monad. It is limited to finite probability
+; distributions (e.g. there is a finite number of possible value), which
+; are represented as maps from values to probabilities.
+
+(defmonad dist-m
+ "Monad describing computations on fuzzy quantities, represented by a finite
+ probability distribution for the possible values. A distribution is
+ represented by a map from values to probabilities."
+ [m-result (fn m-result-dist [v]
+ {v 1})
+ m-bind (fn m-bind-dist [mv f]
+ (reduce (partial merge-with +)
+ (for [[x p] mv [y q] (f x)]
+ {y (* q p)})))
+ ])
+
+; Applying the monad transformer maybe-t to the basic dist monad results
+; in the cond-dist monad that can handle invalid values. The total probability
+; for invalid values ends up as the probability of m-zero (which is nil).
+; The function normalize takes this probability out of the distribution and
+; re-distributes its weight over the valid values.
+
+(defvar cond-dist-m
+ (maybe-t dist-m)
+ "Variant of the dist monad that can handle undefined values.")
+
+; Normalization
+
+(defn- scale-by
+ "Multiply each entry in dist by the scale factor s and remove zero entries."
+ [dist s]
+ (into {}
+ (for [[val p] dist :when (> p 0)]
+ [val (* p s)])))
+
+(defn normalize-cond [cdist]
+ "Normalize a probability distribution resulting from a computation in
+ the cond-dist monad by re-distributing the weight of the invalid values
+ over the valid ones."
+ (let [missing (get cdist nil 0)
+ dist (dissoc cdist nil)]
+ (cond (zero? missing) dist
+ (= 1 missing) {}
+ :else (let [scale (/ 1 (- 1 missing))]
+ (scale-by dist scale)))))
+
+(defn normalize
+ "Convert a weight map (e.g. a map of counter values) to a distribution
+ by multiplying with a normalization factor. If the map has a key
+ :total, its value is assumed to be the sum over all the other values and
+ it is used for normalization. Otherwise, the sum is calculated
+ explicitly. The :total key is removed from the resulting distribution."
+ [weights]
+ (let [total (:total weights)
+ w (dissoc weights :total)
+ s (/ 1 (if (nil? total) (reduce + (vals w)) total))]
+ (scale-by w s)))
+
+; Functions that construct distributions
+
+(defn uniform
+ "Return a distribution in which each of the elements of coll
+ has the same probability."
+ [coll]
+ (let [n (count coll)
+ p (/ 1 n)]
+ (into {} (for [x (seq coll)] [x p]))))
+
+(defn choose
+ "Construct a distribution from an explicit list of probabilities
+ and values. They are given in the form of a vector of probability-value
+ pairs. In the last pair, the probability can be given by the keyword
+ :else, which stands for 1 minus the total of the other probabilities."
+ [& choices]
+ (letfn [(add-choice [dist [p v]]
+ (cond (nil? p) dist
+ (= p :else)
+ (let [total-p (reduce + (vals dist))]
+ (assoc dist v (- 1 total-p)))
+ :else (assoc dist v p)))]
+ (reduce add-choice {} (partition 2 choices))))
+
+(defn bernoulli
+ [p]
+ "Returns the Bernoulli distribution for probability p."
+ (choose p 1 :else 0))
+
+(defn- bc
+ [n]
+ "Returns the binomial coefficients for a given n."
+ (let [r (inc n)]
+ (loop [c 1
+ f (list 1)]
+ (if (> c n)
+ f
+ (recur (inc c) (cons (* (/ (- r c) c) (first f)) f))))))
+
+(defn binomial
+ [n p]
+ "Returns the binomial distribution, which is the distribution of the
+ number of successes in a series of n experiments whose individual
+ success probability is p."
+ (let [q (- 1 p)
+ n1 (inc n)
+ k (range n1)
+ pk (take n1 (iterate #(* p %) 1))
+ ql (reverse (take n1 (iterate #(* q %) 1)))
+ f (bc n)]
+ (into {} (map vector k (map * f pk ql)))))
+
+(defn make-distribution
+ "Returns the distribution in which each element x of the collection
+ has a probability proportional to (f x)"
+ [coll f]
+ (normalize (into {} (for [k coll] [k (f k)]))))
+
+(defn zipf
+ "Returns the Zipf distribution in which the numbers k=1..n have
+ probabilities proportional to 1/k^s."
+ [s n]
+ (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s))))
+
+(defn certainly
+ "Returns a distribution in which the single value v has probability 1."
+ [v]
+ {v 1})
+
+(with-monad dist-m
+
+ (defn join-with
+ "Returns the distribution of (f x y) with x from dist1 and y from dist2."
+ [f dist1 dist2]
+ ((m-lift 2 f) dist1 dist2))
+
+)
+
+(with-monad cond-dist-m
+ (defn cond-prob
+ "Returns the conditional probability for the values in dist that satisfy
+ the predicate pred."
+ [pred dist]
+ (normalize-cond
+ (domonad
+ [v dist
+ :when (pred v)]
+ v))))
+
+; Select (with equal probability) N items from a sequence
+
+(defn- nth-and-rest [n xs]
+ "Return a list containing the n-th value of xs and the sequence
+ obtained by removing the n-th value from xs."
+ (let [[h t] (split-at n xs)]
+ (list (first t) (concat h (rest t)))))
+
+(with-monad dist-m
+
+ (defn- select-n [n xs]
+ (letfn [(select-1 [[s xs]]
+ (uniform (for [i (range (count xs))]
+ (let [[nth rest] (nth-and-rest i xs)]
+ (list (cons nth s) rest)))))]
+ ((m-chain (replicate n select-1)) (list '() xs))))
+
+ (defn select [n xs]
+ "Return the distribution for all possible ordered selections of n elements
+ out of xs."
+ ((m-lift 1 first) (select-n n xs)))
+
+)
+
+; Find the probability that a given predicate is satisfied
+
+(defn prob
+ "Return the probability that the predicate pred is satisfied in the
+ distribution dist, i.e. the sum of the probabilities of the values
+ that satisfy pred."
+ [pred dist]
+ (apply + (for [[x p] dist :when (pred x)] p)))
+
diff --git a/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj
new file mode 100644
index 00000000..73c89de8
--- /dev/null
+++ b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj
@@ -0,0 +1,240 @@
+;; Monte-Carlo algorithms
+
+;; by Konrad Hinsen
+;; last updated May 3, 2009
+
+;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use
+;; and distribution terms for this software are covered by the Eclipse
+;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this
+;; distribution. By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license. You must not
+;; remove this notice, or any other, from this software.
+
+(ns
+ ^{:author "Konrad Hinsen"
+ :doc "Monte-Carlo method support
+
+ Monte-Carlo methods transform an input random number stream
+ (usually having a continuous uniform distribution in the
+ interval [0, 1)) into a random number stream whose distribution
+ satisfies certain conditions (usually the expectation value
+ is equal to some desired quantity). They are thus
+ transformations from one probability distribution to another one.
+
+ This library represents a Monte-Carlo method by a function that
+ takes as input the state of a random number stream with
+ uniform distribution (see
+ clojure.contrib.probabilities.random-numbers) and returns a
+ vector containing one sample value of the desired output
+ distribution and the final state of the input random number
+ stream. Such functions are state monad values and can be
+ composed using operations defined in clojure.contrib.monads."}
+ clojure.contrib.probabilities.monte-carlo
+ (:refer-clojure :exclude (deftype))
+ (:use [clojure.contrib.macros :only (const)])
+ (:use [clojure.contrib.types :only (deftype)])
+ (:use [clojure.contrib.stream-utils :only (defstream stream-next)])
+ (:use [clojure.contrib.monads
+ :only (with-monad state-m m-lift m-seq m-fmap)])
+ (:require [clojure.contrib.generic.arithmetic :as ga])
+ (:require [clojure.contrib.accumulators :as acc]))
+
+;; Random number transformers and random streams
+;;
+;; A random number transformer is a function that takes a random stream
+;; state as input and returns the next value from the transformed stream
+;; plus the new state of the input stream. Random number transformers
+;; are thus state monad values.
+;;
+;; Distributions are implemented as random number transformers that
+;; transform a uniform distribution in the interval [0, 1) to the
+;; desired distribution. Composition of such distributions allows
+;; the realization of any kind of Monte-Carlo algorithm. The result
+;; of such a composition is always again a distribution.
+;;
+;; Random streams are defined by a random number transformer and an
+;; input random number stream. If the randon number transformer represents
+;; a distribution, the input stream must have a uniform distribution
+;; in the interval [0, 1).
+
+; Random stream definition
+(deftype ::random-stream random-stream
+ "Define a random stream by a distribution and the state of a
+ random number stream with uniform distribution in [0, 1)."
+ {:arglists '([distribution random-stream-state])}
+ (fn [d rs] (list d rs)))
+
+(defstream ::random-stream
+ [[d rs]]
+ (let [[r nrs] (d rs)]
+ [r (random-stream d nrs)]))
+
+; Rejection of values is used in the construction of distributions
+(defn reject
+ "Return the distribution that results from rejecting the values from
+ dist that do not satisfy predicate p."
+ [p dist]
+ (fn [rs]
+ (let [[r nrs] (dist rs)]
+ (if (p r)
+ (recur nrs)
+ [r nrs]))))
+
+; Draw a value from a discrete distribution given as a map from
+; values to probabilities.
+; (see clojure.contrib.probabilities.finite-distributions)
+(with-monad state-m
+ (defn discrete
+ "A discrete distribution, defined by a map dist mapping values
+ to probabilities. The sum of probabilities must be one."
+ [dist]
+ (letfn [(pick-at-level [l dist-items]
+ (let [[[x p] & rest-dist] dist-items]
+ (if (> p l)
+ x
+ (recur (- l p) rest-dist))))]
+ (m-fmap #(pick-at-level % (seq dist)) stream-next))))
+
+; Uniform distribution in an finite half-open interval
+(with-monad state-m
+ (defn interval
+ [a b]
+ "Transform a sequence of uniform random numbers in the interval [0, 1)
+ into a sequence of uniform random numbers in the interval [a, b)."
+ (let [d (- b a)
+ f (if (zero? a)
+ (if (= d 1)
+ identity
+ (fn [r] (* d r)))
+ (if (= d 1)
+ (fn [r] (+ a r))
+ (fn [r] (+ a (* d r)))))]
+ (m-fmap f stream-next))))
+
+; Normal (Gaussian) distribution
+(defn normal
+ "Transform a sequence urs of uniform random number in the interval [0, 1)
+ into a sequence of normal random numbers with mean mu and standard
+ deviation sigma."
+ [mu sigma]
+ ; This function implements the Kinderman-Monahan ratio method:
+ ; A.J. Kinderman & J.F. Monahan
+ ; Computer Generation of Random Variables Using the Ratio of Uniform Deviates
+ ; ACM Transactions on Mathematical Software 3(3) 257-260, 1977
+ (fn [rs]
+ (let [[u1 rs] (stream-next rs)
+ [u2* rs] (stream-next rs)
+ u2 (- 1. u2*)
+ s (const (* 4 (/ (. Math exp (- 0.5)) (. Math sqrt 2.))))
+ z (* s (/ (- u1 0.5) u2))
+ zz (+ (* 0.25 z z) (. Math log u2))]
+ (if (> zz 0)
+ (recur rs)
+ [(+ mu (* sigma z)) rs]))))
+
+; Lognormal distribution
+(with-monad state-m
+ (defn lognormal
+ "Transform a sequence of uniform random numbesr in the interval [0, 1)
+ into a sequence of lognormal random numbers with mean mu and standard
+ deviation sigma."
+ [mu sigma]
+ (m-fmap #(. Math exp %) (normal mu sigma))))
+
+; Exponential distribution
+(with-monad state-m
+ (defn exponential
+ "Transform a sequence of uniform random numbers in the interval [0, 1)
+ into a sequence of exponential random numbers with parameter lambda."
+ [lambda]
+ (when (<= lambda 0)
+ (throw (IllegalArgumentException.
+ "exponential distribution requires a positive argument")))
+ (let [neg-inv-lambda (- (/ lambda))
+ ; remove very small numbers to prevent log from returning -Infinity
+ not-too-small (reject #(< % 1e-323) stream-next)]
+ (m-fmap #(* (. Math log %) neg-inv-lambda) not-too-small))))
+
+; Another implementation of the normal distribution. It uses the
+; Box-Muller transform, but discards one of the two result values
+; at each cycle because the random number transformer interface cannot
+; handle two outputs at the same time.
+(defn normal-box-muller
+ "Transform a sequence of uniform random numbers in the interval [0, 1)
+ into a sequence of normal random numbers with mean mu and standard
+ deviation sigma."
+ [mu sigma]
+ (fn [rs]
+ (let [[u1 rs] (stream-next rs)
+ [u2 rs] (stream-next rs)
+ v1 (- (* 2.0 u1) 1.0)
+ v2 (- (* 2.0 u2) 1.0)
+ s (+ (* v1 v1) (* v2 v2))
+ ls (. Math sqrt (/ (* -2.0 (. Math log s)) s))
+ x1 (* v1 ls)
+ x2 (* v2 ls)]
+ (if (or (>= s 1) (= s 0))
+ (recur rs)
+ [x1 rs]))))
+
+; Finite samples from a distribution
+(with-monad state-m
+
+ (defn sample
+ "Return the distribution of samples of length n from the
+ distribution dist"
+ [n dist]
+ (m-seq (replicate n dist)))
+
+ (defn sample-reduce
+ "Returns the distribution of the reduction of f over n samples from the
+ distribution dist."
+ ([f n dist]
+ (if (zero? n)
+ (m-result (f))
+ (let [m-f (m-lift 2 f)
+ sample (replicate n dist)]
+ (reduce m-f sample))))
+ ([f val n dist]
+ (let [m-f (m-lift 2 f)
+ m-val (m-result val)
+ sample (replicate n dist)]
+ (reduce m-f m-val sample))))
+
+ (defn sample-sum
+ "Return the distribution of the sum over n samples from the
+ distribution dist."
+ [n dist]
+ (sample-reduce ga/+ n dist))
+
+ (defn sample-mean
+ "Return the distribution of the mean over n samples from the
+ distribution dist"
+ [n dist]
+ (let [div-by-n (m-lift 1 #(ga/* % (/ n)))]
+ (div-by-n (sample-sum n dist))))
+
+ (defn sample-mean-variance
+ "Return the distribution of the mean-and-variance (a vector containing
+ the mean and the variance) over n samples from the distribution dist"
+ [n dist]
+ (let [extract (m-lift 1 (fn [mv] [(:mean mv) (:variance mv)]))]
+ (extract (sample-reduce acc/add acc/empty-mean-variance n dist))))
+
+)
+
+; Uniform distribution inside an n-sphere
+(with-monad state-m
+ (defn n-sphere
+ "Return a uniform distribution of n-dimensional vectors inside an
+ n-sphere of radius r."
+ [n r]
+ (let [box-dist (sample n (interval (- r) r))
+ sq #(* % %)
+ r-sq (sq r)
+ vec-sq #(apply + (map sq %))
+ sphere-dist (reject #(> (vec-sq %) r-sq) box-dist)
+ as-vectors (m-lift 1 vec)]
+ (as-vectors sphere-dist))))
+
diff --git a/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj
new file mode 100644
index 00000000..8f7b358c
--- /dev/null
+++ b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj
@@ -0,0 +1,63 @@
+;; Random number generators
+
+;; by Konrad Hinsen
+;; last updated May 3, 2009
+
+;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use
+;; and distribution terms for this software are covered by the Eclipse
+;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+;; which can be found in the file epl-v10.html at the root of this
+;; distribution. By using this software in any fashion, you are
+;; agreeing to be bound by the terms of this license. You must not
+;; remove this notice, or any other, from this software.
+
+(ns
+ ^{:author "Konrad Hinsen"
+ :doc "Random number streams
+
+ This library provides random number generators with a common
+ stream interface. They all produce pseudo-random numbers that are
+ uniformly distributed in the interval [0, 1), i.e. 0 is a
+ possible value but 1 isn't. For transformations to other
+ distributions, see clojure.contrib.probabilities.monte-carlo.
+
+ At the moment, the only generator provided is a rather simple
+ linear congruential generator."}
+ clojure.contrib.probabilities.random-numbers
+ (:refer-clojure :exclude (deftype))
+ (:use [clojure.contrib.types :only (deftype)])
+ (:use [clojure.contrib.stream-utils :only (defstream)])
+ (:use [clojure.contrib.def :only (defvar)]))
+
+;; Linear congruential generator
+;; http://en.wikipedia.org/wiki/Linear_congruential_generator
+
+(deftype ::lcg lcg
+ "Create a linear congruential generator"
+ {:arglists '([modulus multiplier increment seed])}
+ (fn [modulus multiplier increment seed]
+ {:m modulus :a multiplier :c increment :seed seed})
+ (fn [s] (map s (list :m :a :c :seed))))
+
+(defstream ::lcg
+ [lcg-state]
+ (let [{m :m a :a c :c seed :seed} lcg-state
+ value (/ (float seed) (float m))
+ new-seed (rem (+ c (* a seed)) m)]
+ [value (assoc lcg-state :seed new-seed)]))
+
+;; A generator based on Clojure's built-in rand function
+;; (and thus random from java.lang.Math)
+;; Note that this generator uses an internal mutable state.
+;;
+;; The state is *not* stored in the stream object and can thus
+;; *not* be restored!
+
+(defvar rand-stream (with-meta 'rand {:type ::rand-stream})
+ "A random number stream based on clojure.core/rand. Note that this
+ generator uses an internal mutable state. The state is thus not stored
+ in the stream object and cannot be restored.")
+
+(defstream ::rand-stream
+ [dummy-state]
+ [(rand) dummy-state])