1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
/*
This file is part of GNUnet
Copyright (C) 2013 GNUnet e.V.
GNUnet is free software: you can redistribute it and/or modify it
under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License,
or (at your option) any later version.
GNUnet is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file set/ibf_sim.c
* @brief implementation of simulation for invertible bloom filter
* @author Florian Dold
*
* This code was used for some internal experiments, it is not
* build or shipped as part of the GNUnet system.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_IBF_DECODE 16
/* report average over how many rounds? */
#define ROUNDS 100000
/* enable one of the three below */
// simple fix
#define FIX1 0
// possibly slightly better fix for large IBF_DECODE values
#define FIX2 1
// SIGCOMM algorithm
#define STRATA 0
// print each value?
#define VERBOSE 0
// avoid assembly? (ASM is about 50% faster)
#define SLOW 0
int
main(int argc, char **argv)
{
unsigned int round;
unsigned int buckets[31]; // max is 2^31 as 'random' returns only between 0 and 2^31
unsigned int i;
int j;
unsigned int r;
unsigned int ret;
unsigned long long total;
unsigned int want;
double predict;
srandom (time (NULL));
total = 0;
want = atoi (argv[1]);
for (round=0;round<ROUNDS;round++)
{
memset (buckets, 0, sizeof (buckets));
for (i=0;i<want;i++)
{
/* FIXME: might want to use 'better' PRNG to avoid
PRNG-induced biases */
r = random ();
if (0 == r)
continue;
#if SLOW
for (j=0;(j < 31) && (0 == (r & (1 << j)));j++) ;
#else
/* use assembly / gcc */
j = __builtin_ffs (r) - 1;
#endif
buckets[j]++;
}
ret = 0;
predict = 0.0;
for (j=31;j >= 0; j--)
{
#if FIX1
/* improved algorithm, for 1000 elements with IBF-DECODE 8, I
get 990/1000 elements on average over 1 million runs; key
idea being to stop short of the 'last' possible IBF as
otherwise a "lowball" per-chance would unduely influence the
result */
if ( (j > 0) &&
(buckets[j - 1] > MAX_IBF_DECODE) )
{
ret *= (1 << (j + 1));
break;
}
#endif
#if FIX2
/* another improvement: don't just always cut off the last one,
but rather try to predict based on all previous values where
that "last" one is; additional prediction can only really
work if MAX_IBF_DECODE is sufficiently high */
if ( (j > 0) &&
( (buckets[j - 1] > MAX_IBF_DECODE) ||
(predict > MAX_IBF_DECODE) ) )
{
ret *= (1 << (j + 1));
break;
}
#endif
#if STRATA
/* original algorithm, for 1000 elements with IBF-DECODE 8,
I get 920/1000 elements on average over 1 million runs */
if (buckets[j] > MAX_IBF_DECODE)
{
ret *= (1 << (j+1));
break;
}
#endif
ret += buckets[j];
predict = (buckets[j] + 2.0 * predict) / 2.0;
}
#if VERBOSE
fprintf (stderr, "%u ", ret);
#endif
total += ret;
}
fprintf (stderr, "\n");
fprintf (stdout, "average %llu\n", total / ROUNDS);
return 0;
}
/* TODO: should calculate stddev of the results to also be able to
say something about the stability of the results, outside of
large-scale averages -- gaining 8% precision at the expense of
50% additional variance might not be worth it... */
|