-
Notifications
You must be signed in to change notification settings - Fork 151
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added store_many_vectors on Mongo Storage #87
base: master
Are you sure you want to change the base?
Changes from 2 commits
c93c3cd
86821b0
1579e24
8ad62c4
7ce25ec
4506f5b
25b4c48
152c49a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,8 +34,13 @@ | |
except ImportError: | ||
import pickle | ||
|
||
from future.builtins import bytes | ||
try: | ||
from pymongo import InsertOne | ||
except ImportError: | ||
pass | ||
|
||
from nearpy.storage.storage import Storage | ||
from future.builtins import zip | ||
|
||
|
||
class MongoStorage(Storage): | ||
|
@@ -45,7 +50,22 @@ def __init__(self, mongo_object): | |
""" Uses specified pymongo object for storage. """ | ||
self.mongo_object = mongo_object | ||
|
||
def store_many_vectors(self, hash_name, bucket_keys, vs, data): | ||
requests = [] | ||
|
||
for v, d, bk in zip(vs, data, bucket_keys): | ||
vc = self._get_vector(hash_name, bk, v, d) | ||
|
||
requests.append(InsertOne(vc)) | ||
|
||
self.mongo_object.bulk_write(requests, ordered=False) | ||
|
||
def store_vector(self, hash_name, bucket_key, v, data): | ||
val_dict = self._get_vector(hash_name, bucket_key, v, data) | ||
|
||
self.mongo_object.insert_one(val_dict) | ||
|
||
def _get_vector(self, hash_name, bucket_key, v, data): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This docstring belongs to |
||
Stores vector and JSON-serializable data in MongoDB with specified key. | ||
""" | ||
|
@@ -83,8 +103,7 @@ def store_vector(self, hash_name, bucket_key, v, data): | |
if data is not None: | ||
val_dict['data'] = data | ||
|
||
# Push JSON representation of dict to end of bucket list | ||
self.mongo_object.insert_one(val_dict) | ||
return val_dict | ||
|
||
def _format_mongo_key(self, hash_name, bucket_key): | ||
return '{}{}'.format(self._format_hash_prefix(hash_name), bucket_key) | ||
|
@@ -147,7 +166,7 @@ def get_bucket(self, hash_name, bucket_key): | |
shape=(val_dict['dim'], 1)) | ||
|
||
else: | ||
vector = numpy.fromstring(val_dict['vector'], | ||
vector = numpy.frombuffer(val_dict['vector'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated this because I got some deprecation warnings |
||
dtype=val_dict['dtype']) | ||
[val_dict.pop(k) for k in ['vector', 'dtype', '_id']] | ||
# Add data to result tuple, if present | ||
|
@@ -159,14 +178,14 @@ def clean_buckets(self, hash_name): | |
""" | ||
Removes all buckets and their content for specified hash. | ||
""" | ||
self.mongo_object.remove( | ||
self.mongo_object.delete_many( | ||
{'lsh': {'$regex': self._format_hash_prefix(hash_name)}}) | ||
|
||
def clean_all_buckets(self): | ||
""" | ||
Removes all buckets from all hashes and their content. | ||
""" | ||
self.mongo_object.remove( | ||
self.mongo_object.delete_many( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
{'lsh': {'$regex': 'nearpy_'}}) | ||
|
||
def store_hash_configuration(self, lshash): | ||
|
@@ -186,5 +205,6 @@ def load_hash_configuration(self, hash_name): | |
conf = self.mongo_object.find_one( | ||
{'hash_conf_name': hash_name + '_conf'} | ||
) | ||
|
||
return pickle.loads(conf['hash_configuration']) if conf is not None\ | ||
else None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suggest using
from future.builtins import zip
because it is more efficient in python2.7.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the code, thanks