-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
369 additions
and
370 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import json | ||
import datetime | ||
|
||
class ConfigManager: | ||
def __init__(self, config_file='config.json'): | ||
self.config_file = config_file | ||
self._config = None | ||
|
||
def load_config(self): | ||
""" | ||
Gets a copy of the config file | ||
Returns: Config file in JSON | ||
""" | ||
if self._config is None: | ||
with open(self.config_file) as f: | ||
self._config = json.load(f) | ||
return self._config | ||
|
||
def save_config(self, new_config): | ||
""" | ||
Overwrites the existing config with a new config file | ||
Args: | ||
new_config: New config file with the updated information | ||
Returns: | ||
""" | ||
config = self.load_config() | ||
config.update(new_config) | ||
with open(self.config_file, 'w') as f: | ||
json.dump(config, f) | ||
|
||
def save_time(self, attribute): | ||
""" | ||
Saves the current time to an attribute in the config | ||
Args: | ||
attribute: Config attribute to save current time to | ||
Returns: | ||
""" | ||
config = self.load_config() | ||
config[attribute] = datetime.datetime.now().isoformat() | ||
self.save_config(config) | ||
|
||
def get_es_endpoint(self): | ||
return self.load_config().get('elasticsearch_endpoint') | ||
|
||
def save_update_end_time(self): | ||
""" | ||
Save end time of indexing | ||
Returns: | ||
""" | ||
return self.save_time("last_update_end") | ||
|
||
|
||
def save_update_start_time(self): | ||
""" | ||
Save start time of indexing | ||
Returns: | ||
""" | ||
return self.save_time("last_update_start") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import pickle | ||
import os | ||
class DataManager: | ||
def __init__(self, clusters_filename='dumps/clusters_dump', uri2rank_filename='dumps/uri2rank_dump'): | ||
self.clusters_filename = clusters_filename | ||
self.uri2rank_filename = uri2rank_filename | ||
self._clusters = None | ||
self._uri2rank = None | ||
|
||
def save_clusters(self, clusters): | ||
""" | ||
Save clusters of parts | ||
Args: | ||
new_clusters: Clusters to be saved | ||
Returns: | ||
""" | ||
self._clusters = clusters | ||
self._serialize(self._clusters, self.clusters_filename) | ||
|
||
def get_clusters(self): | ||
if self._clusters is None: | ||
self._clusters = self._deserialize(self.clusters_filename) | ||
return self._clusters | ||
|
||
def save_uri2rank(self, uri2rank): | ||
""" | ||
Saves the pagerank of all URI's | ||
Args: | ||
new_uri2rank: | ||
Returns: | ||
""" | ||
self._uri2rank = uri2rank | ||
self._serialize(self._uri2rank, self.uri2rank_filename) | ||
|
||
def get_uri2rank(self): | ||
""" | ||
Gets all pageranks of URI's | ||
Returns: | ||
""" | ||
if self._uri2rank is None: | ||
self._uri2rank = self._deserialize(self.uri2rank_filename) | ||
return self._uri2rank | ||
|
||
@staticmethod | ||
def _serialize(data, filename): | ||
""" | ||
Serializes some data to a file | ||
Args: | ||
data: Data to be written | ||
filename: File to be written to | ||
Returns: | ||
""" | ||
with open(filename, 'wb') as f: | ||
pickle.dump(data, f) | ||
|
||
@staticmethod | ||
def _deserialize(filename): | ||
""" | ||
Deserializes data from a serialized file | ||
Args: | ||
filename: Serialized file | ||
Returns: Deserialized data from file | ||
""" | ||
if os.path.exists(filename): | ||
with open(filename, 'rb') as f: | ||
return pickle.load(f) | ||
return {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from elasticsearch import Elasticsearch | ||
|
||
class ElasticsearchManager: | ||
def __init__(self, config_manager): | ||
self.config_manager = config_manager | ||
self._es = None | ||
|
||
def get_es(self): | ||
""" | ||
Gets an instance of elasticsearch | ||
Returns: The instance of elasticsearch | ||
""" | ||
if self._es is None: | ||
self._es = Elasticsearch([self.config_manager.get_es_endpoint()], verify_certs=True) | ||
if not self._es.ping(): | ||
raise ValueError('Elasticsearch connection failed') | ||
return self._es |
Oops, something went wrong.