diff options
author | Stuart Sierra <mail@stuartsierra.com> | 2010-08-07 16:41:53 -0400 |
---|---|---|
committer | Stuart Sierra <mail@stuartsierra.com> | 2010-08-07 16:41:53 -0400 |
commit | a6a92b9b3d2bfd9a56e1e5e9cfba706d1aeeaae5 (patch) | |
tree | f1f3da9887dc2dc557df3282b0bcbd4d701ec593 /modules/probabilities | |
parent | e7930c85290f77815cdb00a60604feedfa2d0194 (diff) |
Split all namespaces into sub-modules.
* Examples and tests have not been copied over.
* Clojure test/compile phases are commented out in parent POM.
* May require installing parent POM before full build.
Diffstat (limited to 'modules/probabilities')
4 files changed, 532 insertions, 0 deletions
diff --git a/modules/probabilities/pom.xml b/modules/probabilities/pom.xml new file mode 100644 index 00000000..5f478a18 --- /dev/null +++ b/modules/probabilities/pom.xml @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http//www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.clojure.contrib</groupId> + <artifactId>parent</artifactId> + <version>1.3.0-SNAPSHOT</version> + <relativePath>../parent</relativePath> + </parent> + <artifactId>probabilities-finite-distributions</artifactId> + <dependencies> + <dependency> + <groupId>org.clojure.contrib</groupId> + <artifactId>def</artifactId> + <version>1.3.0-SNAPSHOT</version> + </dependency> + <dependency> + <groupId>org.clojure.contrib</groupId> + <artifactId>monads</artifactId> + <version>1.3.0-SNAPSHOT</version> + </dependency> + </dependencies> +</project>
\ No newline at end of file diff --git a/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj new file mode 100644 index 00000000..86e5aec0 --- /dev/null +++ b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/finite_distributions.clj @@ -0,0 +1,203 @@ +;; Finite probability distributions + +;; by Konrad Hinsen +;; last updated January 8, 2010 + +;; Copyright (c) Konrad Hinsen, 2009-2010. All rights reserved. The use +;; and distribution terms for this software are covered by the Eclipse +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this +;; distribution. By using this software in any fashion, you are +;; agreeing to be bound by the terms of this license. You must not +;; remove this notice, or any other, from this software. + +(ns + ^{:author "Konrad Hinsen" + :doc "Finite probability distributions + This library defines a monad for combining finite probability + distributions."} + clojure.contrib.probabilities.finite-distributions + (:use [clojure.contrib.monads + :only (defmonad domonad with-monad maybe-t m-lift m-chain)] + [clojure.contrib.def :only (defvar)])) + +; The probability distribution monad. It is limited to finite probability +; distributions (e.g. there is a finite number of possible value), which +; are represented as maps from values to probabilities. + +(defmonad dist-m + "Monad describing computations on fuzzy quantities, represented by a finite + probability distribution for the possible values. A distribution is + represented by a map from values to probabilities." + [m-result (fn m-result-dist [v] + {v 1}) + m-bind (fn m-bind-dist [mv f] + (reduce (partial merge-with +) + (for [[x p] mv [y q] (f x)] + {y (* q p)}))) + ]) + +; Applying the monad transformer maybe-t to the basic dist monad results +; in the cond-dist monad that can handle invalid values. The total probability +; for invalid values ends up as the probability of m-zero (which is nil). +; The function normalize takes this probability out of the distribution and +; re-distributes its weight over the valid values. + +(defvar cond-dist-m + (maybe-t dist-m) + "Variant of the dist monad that can handle undefined values.") + +; Normalization + +(defn- scale-by + "Multiply each entry in dist by the scale factor s and remove zero entries." + [dist s] + (into {} + (for [[val p] dist :when (> p 0)] + [val (* p s)]))) + +(defn normalize-cond [cdist] + "Normalize a probability distribution resulting from a computation in + the cond-dist monad by re-distributing the weight of the invalid values + over the valid ones." + (let [missing (get cdist nil 0) + dist (dissoc cdist nil)] + (cond (zero? missing) dist + (= 1 missing) {} + :else (let [scale (/ 1 (- 1 missing))] + (scale-by dist scale))))) + +(defn normalize + "Convert a weight map (e.g. a map of counter values) to a distribution + by multiplying with a normalization factor. If the map has a key + :total, its value is assumed to be the sum over all the other values and + it is used for normalization. Otherwise, the sum is calculated + explicitly. The :total key is removed from the resulting distribution." + [weights] + (let [total (:total weights) + w (dissoc weights :total) + s (/ 1 (if (nil? total) (reduce + (vals w)) total))] + (scale-by w s))) + +; Functions that construct distributions + +(defn uniform + "Return a distribution in which each of the elements of coll + has the same probability." + [coll] + (let [n (count coll) + p (/ 1 n)] + (into {} (for [x (seq coll)] [x p])))) + +(defn choose + "Construct a distribution from an explicit list of probabilities + and values. They are given in the form of a vector of probability-value + pairs. In the last pair, the probability can be given by the keyword + :else, which stands for 1 minus the total of the other probabilities." + [& choices] + (letfn [(add-choice [dist [p v]] + (cond (nil? p) dist + (= p :else) + (let [total-p (reduce + (vals dist))] + (assoc dist v (- 1 total-p))) + :else (assoc dist v p)))] + (reduce add-choice {} (partition 2 choices)))) + +(defn bernoulli + [p] + "Returns the Bernoulli distribution for probability p." + (choose p 1 :else 0)) + +(defn- bc + [n] + "Returns the binomial coefficients for a given n." + (let [r (inc n)] + (loop [c 1 + f (list 1)] + (if (> c n) + f + (recur (inc c) (cons (* (/ (- r c) c) (first f)) f)))))) + +(defn binomial + [n p] + "Returns the binomial distribution, which is the distribution of the + number of successes in a series of n experiments whose individual + success probability is p." + (let [q (- 1 p) + n1 (inc n) + k (range n1) + pk (take n1 (iterate #(* p %) 1)) + ql (reverse (take n1 (iterate #(* q %) 1))) + f (bc n)] + (into {} (map vector k (map * f pk ql))))) + +(defn make-distribution + "Returns the distribution in which each element x of the collection + has a probability proportional to (f x)" + [coll f] + (normalize (into {} (for [k coll] [k (f k)])))) + +(defn zipf + "Returns the Zipf distribution in which the numbers k=1..n have + probabilities proportional to 1/k^s." + [s n] + (make-distribution (range 1 (inc n)) #(/ (java.lang.Math/pow % s)))) + +(defn certainly + "Returns a distribution in which the single value v has probability 1." + [v] + {v 1}) + +(with-monad dist-m + + (defn join-with + "Returns the distribution of (f x y) with x from dist1 and y from dist2." + [f dist1 dist2] + ((m-lift 2 f) dist1 dist2)) + +) + +(with-monad cond-dist-m + (defn cond-prob + "Returns the conditional probability for the values in dist that satisfy + the predicate pred." + [pred dist] + (normalize-cond + (domonad + [v dist + :when (pred v)] + v)))) + +; Select (with equal probability) N items from a sequence + +(defn- nth-and-rest [n xs] + "Return a list containing the n-th value of xs and the sequence + obtained by removing the n-th value from xs." + (let [[h t] (split-at n xs)] + (list (first t) (concat h (rest t))))) + +(with-monad dist-m + + (defn- select-n [n xs] + (letfn [(select-1 [[s xs]] + (uniform (for [i (range (count xs))] + (let [[nth rest] (nth-and-rest i xs)] + (list (cons nth s) rest)))))] + ((m-chain (replicate n select-1)) (list '() xs)))) + + (defn select [n xs] + "Return the distribution for all possible ordered selections of n elements + out of xs." + ((m-lift 1 first) (select-n n xs))) + +) + +; Find the probability that a given predicate is satisfied + +(defn prob + "Return the probability that the predicate pred is satisfied in the + distribution dist, i.e. the sum of the probabilities of the values + that satisfy pred." + [pred dist] + (apply + (for [[x p] dist :when (pred x)] p))) + diff --git a/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj new file mode 100644 index 00000000..73c89de8 --- /dev/null +++ b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/monte_carlo.clj @@ -0,0 +1,240 @@ +;; Monte-Carlo algorithms + +;; by Konrad Hinsen +;; last updated May 3, 2009 + +;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use +;; and distribution terms for this software are covered by the Eclipse +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this +;; distribution. By using this software in any fashion, you are +;; agreeing to be bound by the terms of this license. You must not +;; remove this notice, or any other, from this software. + +(ns + ^{:author "Konrad Hinsen" + :doc "Monte-Carlo method support + + Monte-Carlo methods transform an input random number stream + (usually having a continuous uniform distribution in the + interval [0, 1)) into a random number stream whose distribution + satisfies certain conditions (usually the expectation value + is equal to some desired quantity). They are thus + transformations from one probability distribution to another one. + + This library represents a Monte-Carlo method by a function that + takes as input the state of a random number stream with + uniform distribution (see + clojure.contrib.probabilities.random-numbers) and returns a + vector containing one sample value of the desired output + distribution and the final state of the input random number + stream. Such functions are state monad values and can be + composed using operations defined in clojure.contrib.monads."} + clojure.contrib.probabilities.monte-carlo + (:refer-clojure :exclude (deftype)) + (:use [clojure.contrib.macros :only (const)]) + (:use [clojure.contrib.types :only (deftype)]) + (:use [clojure.contrib.stream-utils :only (defstream stream-next)]) + (:use [clojure.contrib.monads + :only (with-monad state-m m-lift m-seq m-fmap)]) + (:require [clojure.contrib.generic.arithmetic :as ga]) + (:require [clojure.contrib.accumulators :as acc])) + +;; Random number transformers and random streams +;; +;; A random number transformer is a function that takes a random stream +;; state as input and returns the next value from the transformed stream +;; plus the new state of the input stream. Random number transformers +;; are thus state monad values. +;; +;; Distributions are implemented as random number transformers that +;; transform a uniform distribution in the interval [0, 1) to the +;; desired distribution. Composition of such distributions allows +;; the realization of any kind of Monte-Carlo algorithm. The result +;; of such a composition is always again a distribution. +;; +;; Random streams are defined by a random number transformer and an +;; input random number stream. If the randon number transformer represents +;; a distribution, the input stream must have a uniform distribution +;; in the interval [0, 1). + +; Random stream definition +(deftype ::random-stream random-stream + "Define a random stream by a distribution and the state of a + random number stream with uniform distribution in [0, 1)." + {:arglists '([distribution random-stream-state])} + (fn [d rs] (list d rs))) + +(defstream ::random-stream + [[d rs]] + (let [[r nrs] (d rs)] + [r (random-stream d nrs)])) + +; Rejection of values is used in the construction of distributions +(defn reject + "Return the distribution that results from rejecting the values from + dist that do not satisfy predicate p." + [p dist] + (fn [rs] + (let [[r nrs] (dist rs)] + (if (p r) + (recur nrs) + [r nrs])))) + +; Draw a value from a discrete distribution given as a map from +; values to probabilities. +; (see clojure.contrib.probabilities.finite-distributions) +(with-monad state-m + (defn discrete + "A discrete distribution, defined by a map dist mapping values + to probabilities. The sum of probabilities must be one." + [dist] + (letfn [(pick-at-level [l dist-items] + (let [[[x p] & rest-dist] dist-items] + (if (> p l) + x + (recur (- l p) rest-dist))))] + (m-fmap #(pick-at-level % (seq dist)) stream-next)))) + +; Uniform distribution in an finite half-open interval +(with-monad state-m + (defn interval + [a b] + "Transform a sequence of uniform random numbers in the interval [0, 1) + into a sequence of uniform random numbers in the interval [a, b)." + (let [d (- b a) + f (if (zero? a) + (if (= d 1) + identity + (fn [r] (* d r))) + (if (= d 1) + (fn [r] (+ a r)) + (fn [r] (+ a (* d r)))))] + (m-fmap f stream-next)))) + +; Normal (Gaussian) distribution +(defn normal + "Transform a sequence urs of uniform random number in the interval [0, 1) + into a sequence of normal random numbers with mean mu and standard + deviation sigma." + [mu sigma] + ; This function implements the Kinderman-Monahan ratio method: + ; A.J. Kinderman & J.F. Monahan + ; Computer Generation of Random Variables Using the Ratio of Uniform Deviates + ; ACM Transactions on Mathematical Software 3(3) 257-260, 1977 + (fn [rs] + (let [[u1 rs] (stream-next rs) + [u2* rs] (stream-next rs) + u2 (- 1. u2*) + s (const (* 4 (/ (. Math exp (- 0.5)) (. Math sqrt 2.)))) + z (* s (/ (- u1 0.5) u2)) + zz (+ (* 0.25 z z) (. Math log u2))] + (if (> zz 0) + (recur rs) + [(+ mu (* sigma z)) rs])))) + +; Lognormal distribution +(with-monad state-m + (defn lognormal + "Transform a sequence of uniform random numbesr in the interval [0, 1) + into a sequence of lognormal random numbers with mean mu and standard + deviation sigma." + [mu sigma] + (m-fmap #(. Math exp %) (normal mu sigma)))) + +; Exponential distribution +(with-monad state-m + (defn exponential + "Transform a sequence of uniform random numbers in the interval [0, 1) + into a sequence of exponential random numbers with parameter lambda." + [lambda] + (when (<= lambda 0) + (throw (IllegalArgumentException. + "exponential distribution requires a positive argument"))) + (let [neg-inv-lambda (- (/ lambda)) + ; remove very small numbers to prevent log from returning -Infinity + not-too-small (reject #(< % 1e-323) stream-next)] + (m-fmap #(* (. Math log %) neg-inv-lambda) not-too-small)))) + +; Another implementation of the normal distribution. It uses the +; Box-Muller transform, but discards one of the two result values +; at each cycle because the random number transformer interface cannot +; handle two outputs at the same time. +(defn normal-box-muller + "Transform a sequence of uniform random numbers in the interval [0, 1) + into a sequence of normal random numbers with mean mu and standard + deviation sigma." + [mu sigma] + (fn [rs] + (let [[u1 rs] (stream-next rs) + [u2 rs] (stream-next rs) + v1 (- (* 2.0 u1) 1.0) + v2 (- (* 2.0 u2) 1.0) + s (+ (* v1 v1) (* v2 v2)) + ls (. Math sqrt (/ (* -2.0 (. Math log s)) s)) + x1 (* v1 ls) + x2 (* v2 ls)] + (if (or (>= s 1) (= s 0)) + (recur rs) + [x1 rs])))) + +; Finite samples from a distribution +(with-monad state-m + + (defn sample + "Return the distribution of samples of length n from the + distribution dist" + [n dist] + (m-seq (replicate n dist))) + + (defn sample-reduce + "Returns the distribution of the reduction of f over n samples from the + distribution dist." + ([f n dist] + (if (zero? n) + (m-result (f)) + (let [m-f (m-lift 2 f) + sample (replicate n dist)] + (reduce m-f sample)))) + ([f val n dist] + (let [m-f (m-lift 2 f) + m-val (m-result val) + sample (replicate n dist)] + (reduce m-f m-val sample)))) + + (defn sample-sum + "Return the distribution of the sum over n samples from the + distribution dist." + [n dist] + (sample-reduce ga/+ n dist)) + + (defn sample-mean + "Return the distribution of the mean over n samples from the + distribution dist" + [n dist] + (let [div-by-n (m-lift 1 #(ga/* % (/ n)))] + (div-by-n (sample-sum n dist)))) + + (defn sample-mean-variance + "Return the distribution of the mean-and-variance (a vector containing + the mean and the variance) over n samples from the distribution dist" + [n dist] + (let [extract (m-lift 1 (fn [mv] [(:mean mv) (:variance mv)]))] + (extract (sample-reduce acc/add acc/empty-mean-variance n dist)))) + +) + +; Uniform distribution inside an n-sphere +(with-monad state-m + (defn n-sphere + "Return a uniform distribution of n-dimensional vectors inside an + n-sphere of radius r." + [n r] + (let [box-dist (sample n (interval (- r) r)) + sq #(* % %) + r-sq (sq r) + vec-sq #(apply + (map sq %)) + sphere-dist (reject #(> (vec-sq %) r-sq) box-dist) + as-vectors (m-lift 1 vec)] + (as-vectors sphere-dist)))) + diff --git a/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj new file mode 100644 index 00000000..8f7b358c --- /dev/null +++ b/modules/probabilities/src/main/clojure/clojure/contrib/probabilities/random_numbers.clj @@ -0,0 +1,63 @@ +;; Random number generators + +;; by Konrad Hinsen +;; last updated May 3, 2009 + +;; Copyright (c) Konrad Hinsen, 2009. All rights reserved. The use +;; and distribution terms for this software are covered by the Eclipse +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this +;; distribution. By using this software in any fashion, you are +;; agreeing to be bound by the terms of this license. You must not +;; remove this notice, or any other, from this software. + +(ns + ^{:author "Konrad Hinsen" + :doc "Random number streams + + This library provides random number generators with a common + stream interface. They all produce pseudo-random numbers that are + uniformly distributed in the interval [0, 1), i.e. 0 is a + possible value but 1 isn't. For transformations to other + distributions, see clojure.contrib.probabilities.monte-carlo. + + At the moment, the only generator provided is a rather simple + linear congruential generator."} + clojure.contrib.probabilities.random-numbers + (:refer-clojure :exclude (deftype)) + (:use [clojure.contrib.types :only (deftype)]) + (:use [clojure.contrib.stream-utils :only (defstream)]) + (:use [clojure.contrib.def :only (defvar)])) + +;; Linear congruential generator +;; http://en.wikipedia.org/wiki/Linear_congruential_generator + +(deftype ::lcg lcg + "Create a linear congruential generator" + {:arglists '([modulus multiplier increment seed])} + (fn [modulus multiplier increment seed] + {:m modulus :a multiplier :c increment :seed seed}) + (fn [s] (map s (list :m :a :c :seed)))) + +(defstream ::lcg + [lcg-state] + (let [{m :m a :a c :c seed :seed} lcg-state + value (/ (float seed) (float m)) + new-seed (rem (+ c (* a seed)) m)] + [value (assoc lcg-state :seed new-seed)])) + +;; A generator based on Clojure's built-in rand function +;; (and thus random from java.lang.Math) +;; Note that this generator uses an internal mutable state. +;; +;; The state is *not* stored in the stream object and can thus +;; *not* be restored! + +(defvar rand-stream (with-meta 'rand {:type ::rand-stream}) + "A random number stream based on clojure.core/rand. Note that this + generator uses an internal mutable state. The state is thus not stored + in the stream object and cannot be restored.") + +(defstream ::rand-stream + [dummy-state] + [(rand) dummy-state]) |