-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom-filter.c
211 lines (176 loc) · 4.92 KB
/
bloom-filter.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/* bloom-filter.c
*
* Copyright (C) 2012 Christian Hergert <[email protected]>
*
* This file is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This file is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <limits.h>
#include <string.h>
#include "bloom-filter.h"
/*
* MurmurHash3, written by Austin Appleby, and placed in the public
* domain.
*
* http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
*
* Extracted MurmurHash3_x86_32 (without the MSVC specifics), changed types to
* glib types, gave it a return value, assume 0-terminated if len == -1
*/
static inline guint32
rotl32 (guint32 x, gint8 r)
{
return (x << r) | (x >> (32 - r));
}
#define ROTL32(x,y) rotl32(x,y)
/*
* Block read - if your platform needs to do endian-swapping or can only
* handle aligned reads, do the conversion here
*
* NB: original uses __attribute__((always_inline)), but this generates a
* warning with gcc, and since -Werror is on ...
*/
static inline guint32
getblock (const guint32 * p, int i)
{
return p[i];
}
/*
* Finalization mix - force all bits of a hash block to avalanche
*
* NB: original uses __attribute__((always_inline)), but this generates a
* warning with gcc, and since -Werror is on ...
*/
static inline guint32
fmix (guint32 h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static guint32
murmur_hash3_x86_32 (gconstpointer key, int len, guint32 seed)
{
const guint8 *data = (const guint8 *)key;
const guint8 *tail;
const guint32 c1 = 0xcc9e2d51;
const guint32 c2 = 0x1b873593;
const guint32 *blocks;
int i, nblocks;
guint32 h1 = seed;
guint32 k1;
if (len < 0)
len = strlen ((const char*)key);
nblocks = len / 4;
blocks = (const guint32 *)(data + nblocks*4);
for(i = -nblocks; i; i++)
{
k1 = getblock(blocks,i);
k1 *= c1;
k1 = ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
tail = (const guint8*)(data + nblocks*4);
k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
h1 ^= len;
h1 = fmix(h1);
return h1;
}
BloomFilter *
bloom_filter_new_full (gsize width,
gint key_len,
guint n_hash_funcs,
BloomHashFunc first_hash_func,
...)
{
BloomFilter *filter;
va_list args;
guint i;
g_return_val_if_fail(width, NULL);
g_return_val_if_fail(n_hash_funcs, NULL);
filter = g_malloc0(sizeof *filter + (width / CHAR_BIT) + 1);
filter->ref_count = 1;
filter->width = width;
filter->key_len = key_len;
filter->hash_funcs = g_ptr_array_new();
if (first_hash_func) {
g_ptr_array_add(filter->hash_funcs, first_hash_func);
va_start(args, first_hash_func);
for (i = 1; i < n_hash_funcs; i++) {
g_ptr_array_add(filter->hash_funcs, va_arg(args, BloomHashFunc));
}
va_end(args);
} else {
for(i = 0; i < n_hash_funcs; i++) {
g_ptr_array_add(filter->hash_funcs, murmur_hash3_x86_32);
}
}
return filter;
}
BloomFilter *
bloom_filter_new_murmur (gsize width, gint key_len, guint n_hash_funcs)
{
BloomFilter *filter;
filter = bloom_filter_new_full (width, key_len, n_hash_funcs, NULL);
return filter;
}
void
bloom_filter_remove_all (BloomFilter *filter)
{
g_return_if_fail(filter);
memset(filter->data, 0, (filter->width / CHAR_BIT) + 1);
}
BloomFilter *
bloom_filter_ref (BloomFilter *filter)
{
g_return_val_if_fail(filter, NULL);
g_return_val_if_fail(filter->ref_count > 0, NULL);
g_atomic_int_inc(&filter->ref_count);
return filter;
}
void
bloom_filter_unref (BloomFilter *filter)
{
g_return_if_fail(filter);
g_return_if_fail(filter->ref_count > 0);
if (g_atomic_int_dec_and_test(&filter->ref_count)) {
g_ptr_array_unref(filter->hash_funcs);
g_free(filter);
}
}
GType
bloom_filter_get_type (void)
{
static volatile GType type_id;
if (g_once_init_enter(&type_id)) {
GType gtype;
gtype = g_boxed_type_register_static("BloomFilter",
(GBoxedCopyFunc)bloom_filter_ref,
(GBoxedFreeFunc)bloom_filter_unref);
g_once_init_leave(&type_id, gtype);
}
return type_id;
}