forked from xinyandai/product-quantization
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vecs_io.py
54 lines (46 loc) · 1.73 KB
/
vecs_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
def fvecs_read(filename, c_contiguous=True):
fv = np.fromfile(filename, dtype=np.float32)
if fv.size == 0:
return np.zeros((0, 0))
dim = fv.view(np.int32)[0]
assert dim > 0
fv = fv.reshape(-1, 1 + dim)
if not all(fv.view(np.int32)[:, 0] == dim):
raise IOError("Non-uniform vector sizes in " + filename)
fv = fv[:, 1:]
if c_contiguous:
fv = fv.copy()
return fv
def ivecs_read(filename, c_contiguous=True):
iv = np.fromfile(filename, dtype=np.int32)
if iv.size == 0:
return np.zeros((0, 0))
dim = iv.view(np.int32)[0]
assert dim > 0
iv = iv.reshape(-1, 1 + dim)
if not all(iv.view(np.int32)[:, 0] == dim):
raise IOError("Non-uniform vector sizes in " + filename)
iv = iv[:, 1:]
if c_contiguous:
iv = iv.copy()
return iv
def loader(data_set='audio', top_k=20, ground_metric='euclid', folder='../data/'):
"""
:param data_set: data set you wanna load , audio, sift1m, ..
:param top_k: how many nearest neighbor in ground truth file
:param ground_metric:
:param folder:
:return: X, Q, G
"""
folder_path = folder + data_set
base_file = folder_path + '/%s_base.fvecs' % data_set
query_file = folder_path + '/%s_query.fvecs' % data_set
ground_truth = folder_path + '/%s_%s_%s_groundtruth.ivecs' % \
(top_k, data_set, ground_metric)
print("load the base data {}, \nload the queries {}, \nload the ground truth {}".format(base_file, query_file,
ground_truth))
X = fvecs_read(base_file)
Q = fvecs_read(query_file)
G = ivecs_read(ground_truth)
return X, Q, G