forked from urbit/vere
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hashcons.h
259 lines (209 loc) · 5.42 KB
/
hashcons.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
/// @file
#ifndef UR_HASHCONS_H
#define UR_HASHCONS_H
#include <assert.h>
#include <inttypes.h>
#include <limits.h>
#include <stdio.h>
#include "defs.h"
/*
** 64-bit noun references, with the top 2 bits reserved for type tags.
*/
typedef uint64_t ur_nref;
typedef enum {
ur_direct = 0,
ur_iatom = 1,
ur_icell = 2,
} ur_tag;
#define ur_nref_tag(ref) ( ref >> 62 )
#define ur_nref_idx(ref) ur_mask_62(ref)
/*
** 31-bit, non-zero, murmur3-based noun hash.
*/
typedef uint32_t ur_mug;
/*
** associative structures (dictionaries) of noun references,
** distributed by mug across fixed-size buckets (pails),
** reallocated with fibonacci growth once a bucket is full.
**
** - ur_dict_t: set of noun references
** - ur_dict32_t: map from noun reference to uint32
** - ur_dict32_t: map from noun reference to uint64
*/
#define ur_pail_max 10
typedef struct ur_pail32_s {
ur_nref refs[ur_pail_max];
uint32_t vals[ur_pail_max];
} ur_pail32_t;
typedef struct ur_dict32_s {
uint64_t prev;
uint64_t size;
uint8_t *fills;
ur_pail32_t *buckets;
} ur_dict32_t;
typedef struct ur_pail64_s {
ur_nref refs[ur_pail_max];
uint64_t vals[ur_pail_max];
} ur_pail64_t;
typedef struct ur_dict64_s {
uint64_t prev;
uint64_t size;
uint8_t *fills;
ur_pail64_t *buckets;
} ur_dict64_t;
typedef struct ur_pail_s {
ur_nref refs[ur_pail_max];
} ur_pail_t;
typedef struct ur_dict_s {
uint64_t prev;
uint64_t size;
uint8_t *fills;
ur_pail_t *buckets;
} ur_dict_t;
/*
** cells are hash-consed, atoms are deduplicated (byte-array comparison),
** mug hashes are stored, and noun references are unique within a root.
*/
typedef struct ur_cells_s {
ur_dict_t dict;
uint64_t prev;
uint64_t size;
uint64_t fill;
ur_mug *mugs;
ur_nref *heads;
ur_nref *tails;
} ur_cells_t;
typedef struct ur_atoms_s {
ur_dict_t dict;
uint64_t prev;
uint64_t size;
uint64_t fill;
ur_mug *mugs;
uint8_t **bytes;
uint64_t *lens;
} ur_atoms_t;
typedef struct ur_root_s {
ur_cells_t cells;
ur_atoms_t atoms;
} ur_root_t;
/*
** a vector of noun references.
*/
typedef struct ur_nvec_s {
uint64_t fill;
ur_nref* refs;
} ur_nvec_t;
/*
** opaque handle for repeated traversal.
*/
typedef struct ur_walk_fore_s ur_walk_fore_t;
/*
** type-specific dictionary operations.
**
** NB: [r] is only used to retrieve the stored mug of cells and
** indirect atoms. If all references are direct atoms (62-bits or less),
** [r] can be null. This option is used extensively in cue (de-serialization)
** implementations, where the dictionary keys are bit-cursors.
*/
void
ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size);
ur_bool_t
ur_dict32_get(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t *out);
void
ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val);
void
ur_dict32_wipe(ur_dict32_t *dict);
void
ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size);
ur_bool_t
ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out);
void
ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val);
void
ur_dict64_wipe(ur_dict64_t *dict);
void
ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size);
ur_bool_t
ur_dict_get(ur_root_t *r, ur_dict_t *dict, ur_nref ref);
void
ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref);
void
ur_dict_wipe(ur_dict_t *dict);
/*
** free the buckets of any dictionary (cast to ur_dict_t*).
*/
void
ur_dict_free(ur_dict_t *dict);
/*
** measure the bloq (binary-exponent) length of an atom in [r]
*/
uint64_t
ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref);
/*
** find or allocate an atom in [r]
**
** unsafe variant is unsafe wrt allocation (byte arrays must be
** allocated with system malloc) and trailing null bytes (not allowed).
*/
ur_nref
ur_coin_bytes_unsafe(ur_root_t *r, uint64_t len, uint8_t *byt);
ur_nref
ur_coin_bytes(ur_root_t *r, uint64_t len, uint8_t *byt);
ur_nref
ur_coin64(ur_root_t *r, uint64_t n);
/*
** find or construct a cell in [r]
*/
ur_nref
ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal);
/*
** calculate the mug of [ref], or produce the stored value in [r].
*/
ur_mug
ur_nref_mug(ur_root_t *r, ur_nref ref);
/*
** initialize a noun arena (root).
*/
ur_root_t*
ur_root_init(void);
/*
** print root details to [f]
*/
void
ur_root_info(FILE *f, ur_root_t *r);
/*
** dispose all allocations in [r]
*/
void
ur_root_free(ur_root_t *r);
/*
** initialize or dispose a vector of noun references
*/
void
ur_nvec_init(ur_nvec_t *v, uint64_t size);
void
ur_nvec_free(ur_nvec_t *v);
/*
** depth-first, pre-order noun traversal, cells can short-circuit.
*/
void
ur_walk_fore(ur_root_t *r,
ur_nref ref,
void *v,
void (*atom)(ur_root_t*, ur_nref, void*),
ur_bool_t (*cell)(ur_root_t*, ur_nref, void*));
ur_walk_fore_t*
ur_walk_fore_init_with(ur_root_t *r,
uint32_t s_prev,
uint32_t s_size);
ur_walk_fore_t*
ur_walk_fore_init(ur_root_t *r);
void
ur_walk_fore_with(ur_walk_fore_t *w,
ur_nref ref,
void *v,
void (*atom)(ur_root_t*, ur_nref, void*),
ur_bool_t (*cell)(ur_root_t*, ur_nref, void*));
void
ur_walk_fore_done(ur_walk_fore_t *w);
#endif /* ifndef UR_HASHCONS_H */