aboutsummaryrefslogtreecommitdiff
path: root/src/clojure/contrib/accumulators.clj
blob: 5ccee1df9464e9a0f62e38e71cdf57b6dc49c899 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
;; Accumulators

;; by Konrad Hinsen
;; last updated May 3, 2009

;; This module defines various accumulators (list, vector, map,
;; sum, product, counter, and combinations thereof) with a common
;; interface defined by the multimethods add and combine.
;; For each accumulator type, its empty value is defined in this module.
;; Applications typically use this as a starting value and add data
;; using the add multimethod.

;; Copyright (c) Konrad Hinsen, 2009. All rights reserved.  The use
;; and distribution terms for this software are covered by the Eclipse
;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file epl-v10.html at the root of this
;; distribution.  By using this software in any fashion, you are
;; agreeing to be bound by the terms of this license.  You must not
;; remove this notice, or any other, from this software.

(ns
  #^{:author "Konrad Hinsen"
     :doc "A generic accumulator interface and implementations of various
           accumulators."}
  clojure.contrib.accumulators
  (:use [clojure.contrib.types :only (deftype)])
  (:use [clojure.contrib.def :only (defvar defvar- defmacro-)])
  (:require [clojure.contrib.generic.arithmetic :as ga]))

(defmulti add
  "Add item to the accumulator acc. The exact meaning of adding an
   an item depends on the type of the accumulator."
   {:arglists '([acc item])}
  (fn [acc item] (type acc)))

(defn add-items
  "Add all elements of a collection coll to the accumulator acc."
  [acc items]
  (reduce add acc items))

(defmulti combine
  "Combine the values of the accumulators acc1 and acc2 into a
   single accumulator of the same type."
  {:arglists '([& accs])}
  (fn [& accs] (type (first accs))))

;
; An ::accumulator type tag is attached to tbe built-in types
; when used as accumulators, and new types are derived from it.
; Multimethods add and combine for ::accumulator sub-dispatch on class.
; We also define generic addition as the combine operation.
;
(let [meta-map {:type ::accumulator}]
  (defn- with-acc-tag
    [x]
    (with-meta x meta-map)))

(defmethod add ::accumulator
  [a e]
  ((get-method add (class a)) a e))

(defmethod combine ::accumulator
  [& as]
  (apply (get-method add (class (first as))) as))

(defmethod ga/+ ::accumulator
  [x y]
  (combine x y))

;
; Vector accumulator
;
(defvar empty-vector (with-acc-tag [])
  "An empty vector accumulator. Adding an item appends it at the end.")

(defmethod combine clojure.lang.IPersistentVector
  [& vs]
  (with-acc-tag (vec (apply concat vs))))

(defmethod add clojure.lang.IPersistentVector
  [v e]
  (with-acc-tag (conj v e)))

;
; List accumulator
;
(defvar empty-list (with-acc-tag '())
  "An empty list accumulator. Adding an item appends it at the beginning.")

(defmethod combine clojure.lang.IPersistentList
  [& vs]
  (with-acc-tag (apply concat vs)))

(defmethod add clojure.lang.IPersistentList
  [v e]
  (with-acc-tag (conj v e)))

;
; Queue accumulator
;
(defvar empty-queue (with-acc-tag clojure.lang.PersistentQueue/EMPTY)
  "An empty queue accumulator. Adding an item appends it at the end.")

(defmethod combine clojure.lang.PersistentQueue
  [& vs]
  (add-items (first vs) (apply concat (rest vs))))

(defmethod add clojure.lang.PersistentQueue
  [v e]
  (with-acc-tag (conj v e)))

;
; Set accumulator
;
(defvar empty-set (with-acc-tag #{})
  "An empty set accumulator.")

(defmethod combine (class empty-set)
  [& vs]
  (with-acc-tag (apply clojure.set/union vs)))

(defmethod add (class empty-set)
  [v e]
  (with-acc-tag (conj v e)))

;
; String accumulator
;
(defvar empty-string ""
  "An empty string accumulator. Adding an item (string or character)
   appends it at the end.")

(defmethod combine java.lang.String
  [& vs]
  (apply str vs))

(defmethod add java.lang.String
  [v e]
  (str v e))

;
; Map accumulator
;
(defvar empty-map (with-acc-tag {})
  "An empty map accumulator. Items to be added must be [key value] pairs.")

(defmethod combine clojure.lang.IPersistentMap
  [& vs]
  (with-acc-tag (apply merge vs)))

(defmethod add clojure.lang.IPersistentMap
  [v e]
  (with-acc-tag (conj v e)))

;
; Numerical accumulators: sum, product, minimum, maximum
;
(defmacro- defacc
  [name op empty doc-string]
  (let [type-tag (keyword (str *ns*) (str name))
	empty-symbol (symbol (str "empty-" name))]
  `(let [op# ~op]
     (deftype ~type-tag ~name
       (fn [~'x] {:value ~'x})
       (fn [~'x] (list (:value ~'x))))
     (derive ~type-tag ::accumulator)
     (defvar ~empty-symbol (~name ~empty) ~doc-string)
     (defmethod combine ~type-tag [& vs#]
       (~name (apply op# (map :value vs#))))
     (defmethod add ~type-tag [v# e#]
       (~name (op# (:value v#) e#))))))

(defacc sum + 0
  "An empty sum accumulator. Only numbers can be added.")

(defacc product * 1
  "An empty sum accumulator. Only numbers can be added.")

; The empty maximum accumulator should have value -infinity.
; This is represented by nil and taken into account in an
; adapted max function. In the minimum accumulator, nil is
; similarly used to represent +infinity.

(defacc maximum (fn [& xs]
		  (when-let [xs (seq (filter identity xs))]
		      (apply max xs)))
                nil
  "An empty maximum accumulator. Only numbers can be added.")

(defacc minimum (fn [& xs]
		  (when-let [xs (seq (filter identity xs))]
		      (apply min xs)))
                nil
  "An empty minimum accumulator. Only numbers can be added.")

;
; Numeric min-max accumulator
; (combination of minimum and maximum)
;
(deftype ::min-max min-max
  (fn [min max] {:min min :max max})
  (fn [mm] (list (:min mm) (:max mm))))

(derive ::min-max ::accumulator)

(defvar empty-min-max (min-max nil nil)
  "An empty min-max accumulator, combining minimum and maximum.
   Only numbers can be added.")

(defmethod combine ::min-max
  [& vs]
  (let [total-min (apply min (map :min vs))
	total-max (apply max (map :max vs))]
    (min-max total-min total-max)))

(defmethod add ::min-max
  [v e]
  (let [min-v (:min v)
	max-v (:max v)
	new-min (if (nil? min-v) e (min min-v e))
	new-max (if (nil? max-v) e (max max-v e))]
    (min-max new-min new-max)))

;
; Mean and variance accumulator
;
(deftype ::mean-variance mean-variance)

(derive ::mean-variance ::accumulator)

(defvar empty-mean-variance (mean-variance {:n 0 :mean 0 :variance 0})
  "An empty mean-variance accumulator, combining sample mean and
   sample variance. Only numbers can be added.")

(defmethod combine ::mean-variance
  ([mv]
   mv)

  ([mv1 mv2]
   (let [{n1 :n mean1 :mean var1 :variance} mv1
	 {n2 :n mean2 :mean var2 :variance} mv2
	 n (+ n1 n2)
	 mean (/ (+ (* n1 mean1) (* n2 mean2)) n)
	 sq #(* % %)
	 c    (+ (* n1 (sq (- mean mean1))) (* n2 (sq (- mean mean2))))
	 var  (if (< n 2)
		0
		(/ (+ c (* (dec n1) var1) (* (dec n2) var2)) (dec n)))]
     (mean-variance {:n n :mean mean :variance var})))
   
  ([mv1 mv2 & mvs]
   (reduce combine (combine mv1 mv2) mvs)))

(defmethod add ::mean-variance
  [mv x]
  (let [{n :n mean :mean var :variance} mv
	n1 (inc n)
	d (- x mean)
	new-mean (+ mean (/ d n1))
	new-var (if (zero? n) 0 (/ (+ (* (dec n) var) (* d (- x new-mean))) n))]
    (mean-variance {:n n1 :mean new-mean :variance new-var})))

;
; Counter accumulator
;
(deftype ::counter counter)

(derive ::counter ::accumulator)

(defvar empty-counter (counter {})
  "An empty counter accumulator. Its value is a map that stores for
   every item the number of times it was added.")

(defmethod combine ::counter
  [v & vs]
  (letfn [(add-item [cntr [item n]]
		    (assoc cntr item (+ n (get cntr item 0))))
	  (add-two [c1 c2] (reduce add-item c1 c2))]
	 (reduce add-two v vs)))

(defmethod add ::counter
  [v e]
  (assoc v e (inc (get v e 0))))

;
; Counter accumulator with total count
;
(deftype ::counter-with-total counter-with-total)
(derive ::counter-with-total ::counter)

(defvar empty-counter-with-total
  (counter-with-total {:total 0})
  "An empty counter-with-total accumulator. It works like the counter
   accumulator, except that the total number of items added is stored as the
   value of the key :total.")

(defmethod add ::counter-with-total
  [v e]
  (assoc v e (inc (get v e 0))
	 :total (inc (:total v))))

;
; Accumulator n-tuple
;
(deftype ::tuple acc-tuple)

(derive ::tuple ::accumulator)

(defn empty-tuple
  "Returns an accumulator tuple with the supplied empty-accumulators
   as its value. Accumulator tuples consist of several accumulators that
   work in parallel. Added items must be sequences whose number of elements
   matches the number of sub-accumulators."
  [empty-accumulators]
  (acc-tuple (into [] empty-accumulators)))

(defmethod combine ::tuple
  [& vs]
  (acc-tuple (vec (map combine vs))))

(defmethod add ::tuple
  [v e]
  (acc-tuple (vec (map add v e))))