-
Notifications
You must be signed in to change notification settings - Fork 0
/
audioDB.h
402 lines (355 loc) · 12.7 KB
/
audioDB.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#ifndef __AUDIODB_H_
#define __AUDIODB_H_
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#if !defined(WIN32)
#include <sys/mman.h>
#endif
#include <fcntl.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <set>
#include <map>
#include <string>
#include <math.h>
#include <time.h>
#include <assert.h>
#include <float.h>
#include <signal.h>
// includes for LSH indexing
extern "C" {
#include "audioDB_API.h"
}
#include "audioDB-internals.h"
#include "ReporterBase.h"
#include "accumulator.h"
#include "lshlib.h"
// includes for web services
#include "soapH.h"
#include "cmdline.h"
#define MAXSTR ADB_MAXSTR
// Databse PRIMARY commands
#define COM_CREATE "--NEW"
#define COM_INSERT "--INSERT"
#define COM_BATCHINSERT "--BATCHINSERT"
#define COM_QUERY "--QUERY"
#define COM_STATUS "--STATUS"
#define COM_L2NORM "--L2NORM"
#define COM_POWER "--POWER"
#define COM_DUMP "--DUMP"
#define COM_SERVER "--SERVER"
#define COM_INDEX "--INDEX"
#define COM_SAMPLE "--SAMPLE"
#define COM_LISZT "--LISZT"
// parameters
#define COM_CLIENT "--client"
#define COM_DATABASE "--database"
#define COM_QTYPE "--qtype"
#define COM_SEQLEN "--sequencelength"
#define COM_SEQHOP "--sequencehop"
#define COM_POINTNN "--pointnn"
#define COM_RADIUS "--radius"
#define COM_TRACKNN "--resultlength"
#define COM_QPOINT "--qpoint"
#define COM_FEATURES "--features"
#define COM_QUERYKEY "--key"
#define COM_KEYLIST "--keyList"
#define COM_TIMES "--times"
#define COM_QUERYPOWER "--power"
#define COM_RELATIVE_THRESH "--relative-threshold"
#define COM_ABSOLUTE_THRESH "--absolute-threshold"
#define COM_EXHAUSTIVE "--exhaustive"
#define COM_LSH_EXACT "--lsh_exact"
#define COM_NO_UNIT_NORMING "--no_unit_norming"
#define COM_DISTANCE_KULLBACK "--distance_kullback"
#define O2_DEFAULT_POINTNN (10U)
#define O2_DEFAULT_TRACKNN (10U)
//#define O2_DEFAULTDBSIZE (4000000000) // 4GB table size
#define O2_DEFAULTDBSIZE (2000000000) // 2GB table size
#define O2_DEFAULT_DATASIZE (1355U) /* in MB */
#define O2_DEFAULT_NTRACKS (20000U)
#define O2_DEFAULT_DATADIM (9U)
// LIMIT PARAMETERS
#define O2_REALTYPE (double)
#define O2_MAXFILES (1000000U)
#define O2_MAXFILESTR ADB_FILETABLE_ENTRY_SIZE
#define O2_FILETABLE_ENTRY_SIZE ADB_FILETABLE_ENTRY_SIZE
#define O2_TRACKTABLE_ENTRY_SIZE ADB_TRACKTABLE_ENTRY_SIZE
#define O2_HEADERSIZE (sizeof(dbTableHeaderT))
#define O2_MEANNUMVECTORS (1000U)
#define O2_MAXDIM (20000U)
#define O2_MAXNN (1000000U)
#define O2_MAXSEQLEN (8000U) // maximum feature vectors in a sequence
#define O2_MAXTRACKS (1000000U) // maximum number of tracks
#define O2_MAXDOTPRODUCTMEMORY (sizeof(O2_REALTYPE)*O2_MAXSEQLEN*O2_MAXSEQLEN) // 512MB
#define O2_SERIAL_MAX_TRACKBATCH (1000000)
#define O2_LARGE_ADB_SIZE (O2_DEFAULT_DATASIZE+1) // datasize at which features are kept externally (in Mbytes)
#define O2_LARGE_ADB_NTRACKS (O2_DEFAULT_NTRACKS+1) // ntracks at which features are kept externally
#define O2_MAX_VECTORS ( O2_MEANNUMVECTORS * O2_MAXTRACKS )
// Flags
#define O2_FLAG_L2NORM ADB_HEADER_FLAG_L2NORM
#define O2_FLAG_MINMAX (0x2U)
#define O2_FLAG_POWER ADB_HEADER_FLAG_POWER
#define O2_FLAG_TIMES ADB_HEADER_FLAG_TIMES
#define O2_FLAG_LARGE_ADB ADB_HEADER_FLAG_REFERENCES
#define DISPLAY_FLAG(x) (x?"on":"off")
// Query types
#define O2_POINT_QUERY (0x4U)
#define O2_SEQUENCE_QUERY (0x8U)
#define O2_TRACK_QUERY (0x10U)
#define O2_N_SEQUENCE_QUERY (0x20U)
#define O2_ONE_TO_ONE_N_SEQUENCE_QUERY (0x40U)
// Error Codes
#define O2_ERR_KEYNOTFOUND (0xFFFFFF00)
// Macros
#define O2_ACTION(a) (strcmp(command,a)==0)
#define ALIGN_UP(x,w) (((x) + ((1<<w)-1)) & ~((1<<w)-1))
#define ALIGN_DOWN(x,w) ((x) & ~((1<<w)-1))
#define ALIGN_PAGE_UP(x) (((x) + (getpagesize()-1)) & ~(getpagesize()-1))
#define ALIGN_PAGE_DOWN(x) ((x) & ~(getpagesize()-1))
#define ENSURE_STRING(x) ((x) ? (x) : "")
#define CHECKED_MMAP(type, var, start, length) \
{ void *tmp = mmap(0, length, (PROT_READ | (forWrite ? PROT_WRITE : 0)), MAP_SHARED, dbfid, (start)); \
if(tmp == (void *) -1) { \
error("mmap error for db table", #var, "mmap"); \
} \
var = (type) tmp; \
}
#define CHECKED_READ(fd, buf, count) \
{ size_t tmpcount = count; \
ssize_t tmp = read(fd, buf, tmpcount); \
if(tmp == -1) { \
error("read error", "", "read"); \
} else if((size_t) tmp != tmpcount) { \
error("short read", ""); \
} \
}
#define CHECKED_WRITE(fd, buf, count) \
{ size_t tmpcount = count; \
ssize_t tmp = write(fd, buf, tmpcount); \
if(tmp == -1) { \
error("write error", "", "write"); \
} else if((size_t) tmp != tmpcount) { \
error("short write", ""); \
} \
}
#define VERB_LOG(vv, ...) \
if(verbosity > vv) { \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}
// We will only use this in a 32-bit address space
// So map the off_t down to 32-bits first
#define INSERT_FILETABLE_STRING(TABLE, STR) \
strncpy(TABLE + dbH->numFiles*O2_FILETABLE_ENTRY_SIZE, STR, strlen(STR));
#define SAFE_DELETE(PTR) delete PTR; PTR=0;
#define SAFE_DELETE_ARRAY(PTR) delete[] PTR; PTR=0;
extern char* SERVER_ADB_ROOT;
extern char* SERVER_ADB_FEATURE_ROOT;
class audioDB{
private:
gengetopt_args_info args_info;
unsigned dim;
const char *dbName;
const char *inFile;
const char *hostport;
const char *key;
const char* trackFileName;
std::ifstream *trackFile;
const char *command;
const char *output;
const char *timesFileName;
std::ifstream *timesFile;
const char *powerFileName;
std::ifstream *powerFile;
const char* adb_root;
const char* adb_feature_root;
int powerfd;
int dbfid;
int lshfid;
bool forWrite;
int infid;
struct stat statbuf;
struct adb_header *dbH;
struct adb *adb;
char* fileTable;
unsigned* trackTable;
double* l2normTable;
double* timesTable;
double* powerTable;
char* featureFileNameTable;
char* timesFileNameTable;
char* powerFileNameTable;
size_t fileTableLength;
size_t trackTableLength;
size_t timesTableLength;
size_t powerTableLength;
size_t l2normTableLength;
// Flags and parameters
unsigned verbosity; // how much do we want to know?
unsigned nsamples;
//off_t size; // given size (for creation)
unsigned datasize; // size in MB
unsigned ntracks;
unsigned datadim;
unsigned queryType; // point queries default
unsigned pointNN; // how many point NNs ?
unsigned trackNN; // how many track NNs ?
unsigned sequenceLength;
unsigned sequenceHop;
bool normalizedDistance;
bool no_unit_norming;
bool distance_kullback;
unsigned queryPoint;
unsigned usingQueryPoint;
unsigned usingTimes;
unsigned usingPower;
unsigned isClient;
unsigned isServer;
unsigned port;
double timesTol;
double radius;
bool query_from_key;
bool use_absolute_threshold;
double absolute_threshold;
bool use_relative_threshold;
double relative_threshold;
ReporterBase* reporter; // track/point reporter
// LISZT parameters
unsigned lisztOffset;
unsigned lisztLength;
// private methods
void error(const char* a, const char* b = "", const char *sysFunc = 0) __attribute__ ((noreturn));
void insertTimeStamps(unsigned n, std::ifstream* timesFile, double* timesdata);
void initDBHeader(const char *dbName);
void initInputFile(const char *inFile);
void initTables(const char* dbName, const char* inFile = 0);
void initTablesFromKey(const char* dbName, const Uns32T queryIndex);
void prefix_name(char** const name, const char* prefix);
public:
audioDB(const unsigned argc, const char *argv[]);
audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__queryResponse *adbQueryResponse);
audioDB(const unsigned argc, const char *argv[], adb__statusResponse *adbStatusResponse);
audioDB(const unsigned argc, const char *argv[], struct soap *soap, adb__lisztResponse *adbLisztResponse);
void cleanup();
~audioDB();
int processArgs(const unsigned argc, const char* argv[]);
void create(const char* dbName);
void insert(const char* dbName, const char* inFile);
void batchinsert(const char* dbName, const char* inFile);
void datumFromFiles(adb_datum_t *datum);
void query(const char* dbName, const char* inFile, struct soap *soap=0, adb__queryResponse *adbQueryResponse=0);
void status(const char* dbName, adb__statusResponse *adbStatusResponse=0);
unsigned random_track(unsigned *propTable, unsigned total);
void sample(const char *dbName);
void l2norm(const char* dbName);
void power_flag(const char *dbName);
void dump(const char* dbName);
void liszt(const char* dbName, unsigned offset, unsigned numLines, struct soap *soap=0, adb__lisztResponse* adbLisztResponse=0);
// LSH indexing parameters and data structures
LSH* lsh;
bool lsh_in_core; // load LSH tables for query into core (true) or keep on disk (false)
bool lsh_use_u_functions;
bool lsh_exact; // flag to indicate use exact evaluation of points returned by LSH
bool WS_load_index; // flag to indicate that we want to make a Web Services index memory resident
double lsh_param_w; // Width of LSH hash-function bins
Uns32T lsh_param_k; // Number of independent hash functions
Uns32T lsh_param_m; // Combinatorial parameter for m(m-1)/2 hash tables
Uns32T lsh_param_N; // Number of rows per hash table
Uns32T lsh_param_b; // Batch size, in number of tracks, per indexing iteration
Uns32T lsh_param_ncols; // Maximum number of collision in a hash-table row
// LSH indexing and retrieval methods
void index_index_db(const char* dbName);
void index_initialize(double**,double**,double**,double**,unsigned int*);
void index_insert_tracks(Uns32T start_track, Uns32T end_track, double** fvpp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
int index_insert_track(Uns32T trackID, double** fvpp, double** snpp, double** sppp);
Uns32T index_insert_shingles(vector<vector<float> >*, Uns32T trackID, double* spp);
void insertPowerData(unsigned n, int powerfd, double *powerdata);
void init_track_aux_data(Uns32T trackID, double* fvp, double** sNormpp,double** snPtrp, double** sPowerp, double** spPtrp);
// Web Services
void startServer();
void ws_status(const char*dbName, char* hostport);
void ws_query(const char*dbName, const char *featureFileName, const char* hostport);
void ws_query_by_key(const char*dbName, const char *trackKey, const char* featureFileName, const char* hostport);
void ws_liszt(const char* dbName, char* hostport);
};
#define O2_AUDIODB_INITIALIZERS \
dim(0), \
dbName(0), \
inFile(0), \
key(0), \
trackFileName(0), \
trackFile(0), \
command(0), \
output(0), \
timesFileName(0), \
timesFile(0), \
powerFileName(0), \
powerFile(0), \
adb_root(0), \
adb_feature_root(0), \
powerfd(0), \
dbfid(0), \
lshfid(0), \
forWrite(false), \
infid(0), \
dbH(0), \
adb(0), \
fileTable(0), \
trackTable(0), \
l2normTable(0), \
timesTable(0), \
powerTable(0), \
featureFileNameTable(0), \
timesFileNameTable(0), \
powerFileNameTable(0), \
fileTableLength(0), \
trackTableLength(0), \
timesTableLength(0), \
powerTableLength(0), \
l2normTableLength(0), \
verbosity(1), \
nsamples(2000), \
datasize(O2_DEFAULT_DATASIZE), \
ntracks(O2_DEFAULT_NTRACKS), \
datadim(O2_DEFAULT_DATADIM), \
queryType(O2_POINT_QUERY), \
pointNN(O2_DEFAULT_POINTNN), \
trackNN(O2_DEFAULT_TRACKNN), \
sequenceLength(16), \
sequenceHop(1), \
normalizedDistance(true), \
no_unit_norming(false), \
distance_kullback(false), \
queryPoint(0), \
usingQueryPoint(0), \
usingTimes(0), \
usingPower(0), \
isClient(0), \
isServer(0), \
port(0), \
timesTol(0.1), \
radius(0), \
query_from_key(false), \
use_absolute_threshold(false), \
absolute_threshold(0.0), \
use_relative_threshold(false), \
relative_threshold(0.0), \
reporter(0), \
lisztOffset(0), \
lisztLength(0), \
lsh(0), \
lsh_in_core(false), \
lsh_use_u_functions(false), \
lsh_exact(false), \
WS_load_index(false), \
lsh_param_k(0), \
lsh_param_m(0), \
lsh_param_N(0), \
lsh_param_b(0), \
lsh_param_ncols(0)
#endif