Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added store_many_vectors on Mongo Storage #87

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions nearpy/storage/storage_mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@
except ImportError:
import pickle

from future.builtins import bytes
try:
from pymongo import InsertOne
except ImportError:
pass

from nearpy.storage.storage import Storage
from future.builtins import zip


class MongoStorage(Storage):
Expand All @@ -45,7 +50,22 @@ def __init__(self, mongo_object):
""" Uses specified pymongo object for storage. """
self.mongo_object = mongo_object

def store_many_vectors(self, hash_name, bucket_keys, vs, data):
requests = []

for v, d, bk in zip(vs, data, bucket_keys):
Copy link
Collaborator

@amorgun amorgun Nov 20, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest using from future.builtins import zip because it is more efficient in python2.7.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the code, thanks

vc = self._get_vector(hash_name, bk, v, d)

requests.append(InsertOne(vc))

self.mongo_object.bulk_write(requests, ordered=False)

def store_vector(self, hash_name, bucket_key, v, data):
val_dict = self._get_vector(hash_name, bucket_key, v, data)

self.mongo_object.insert_one(val_dict)

def _get_vector(self, hash_name, bucket_key, v, data):
"""
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This docstring belongs to store_vector method

Stores vector and JSON-serializable data in MongoDB with specified key.
"""
Expand Down Expand Up @@ -83,8 +103,7 @@ def store_vector(self, hash_name, bucket_key, v, data):
if data is not None:
val_dict['data'] = data

# Push JSON representation of dict to end of bucket list
self.mongo_object.insert_one(val_dict)
return val_dict

def _format_mongo_key(self, hash_name, bucket_key):
return '{}{}'.format(self._format_hash_prefix(hash_name), bucket_key)
Expand Down Expand Up @@ -147,7 +166,7 @@ def get_bucket(self, hash_name, bucket_key):
shape=(val_dict['dim'], 1))

else:
vector = numpy.fromstring(val_dict['vector'],
vector = numpy.frombuffer(val_dict['vector'],
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated this because I got some deprecation warnings

dtype=val_dict['dtype'])
[val_dict.pop(k) for k in ['vector', 'dtype', '_id']]
# Add data to result tuple, if present
Expand All @@ -159,14 +178,14 @@ def clean_buckets(self, hash_name):
"""
Removes all buckets and their content for specified hash.
"""
self.mongo_object.remove(
self.mongo_object.delete_many(
{'lsh': {'$regex': self._format_hash_prefix(hash_name)}})

def clean_all_buckets(self):
"""
Removes all buckets from all hashes and their content.
"""
self.mongo_object.remove(
self.mongo_object.delete_many(
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove method is deprecated so I replaced it with the suggested delete_many. It avoids annoying deprecation warnings

{'lsh': {'$regex': 'nearpy_'}})

def store_hash_configuration(self, lshash):
Expand All @@ -186,5 +205,6 @@ def load_hash_configuration(self, hash_name):
conf = self.mongo_object.find_one(
{'hash_conf_name': hash_name + '_conf'}
)

return pickle.loads(conf['hash_configuration']) if conf is not None\
else None
4 changes: 4 additions & 0 deletions tests/storage_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def test_store_zero(self):
_, data = bucket[0]
self.assertEqual(data, 0)

def test_store_many_vectors(self):
x = numpy.random.randn(100, 10)
self.check_store_many_vectors(x)


if __name__ == '__main__':
unittest.main()