Skip to content

Commit

Permalink
split utils into 5 classes
Browse files Browse the repository at this point in the history
  • Loading branch information
cl117 committed Aug 27, 2024
1 parent db907e6 commit e4b79f3
Show file tree
Hide file tree
Showing 13 changed files with 369 additions and 370 deletions.
38 changes: 20 additions & 18 deletions flask/cluster.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from xml.etree import ElementTree
import subprocess
import utils
from configManager import ConfigManager
from logger import Logger
import query
from sys import platform


uclust_identity = utils.get_config()['uclust_identity'] # how similar sequences in the same cluster must be
config_manager = ConfigManager()
uclust_identity = config_manager.load_config()['uclust_identity'] # how similar sequences in the same cluster must be
logger_ = Logger()
sequences_filename = 'dumps/sequences.fsa'

if 'which_search' not in utils.get_config():
explorerConfig = utils.get_config()
if 'which_search' not in config_manager.load_config():
explorerConfig = config_manager.load_config()
explorerConfig['which_search'] = 'vsearch'
utils.set_config(explorerConfig)
config_manager.load_config(explorerConfig)

whichSearch = utils.get_config()['which_search']
whichSearch = config_manager.load_config()['which_search']

if platform == "linux" or platform == "linux2":
if whichSearch == 'usearch':
Expand All @@ -26,7 +28,7 @@
elif whichSearch == 'vsearch':
usearch_binary_filename = 'usearch/vsearch_macos'
else:
utils.log("Sorry, your OS is not supported for sequence based-search.")
logger_.log("Sorry, your OS is not supported for sequence based-search.")

uclust_results_filename = 'usearch/uclust_results.uc'

Expand Down Expand Up @@ -56,7 +58,7 @@ def run_uclust():
popen = subprocess.Popen(args, stdout=subprocess.PIPE)
popen.wait()
output = popen.stdout.read()
utils.log_indexing(str(output))
logger_.log(str(output), True)


def analyze_uclust():
Expand All @@ -80,11 +82,11 @@ def analyze_uclust():
hits += 1

f.close()
utils.log_indexing('parts: ' + str(total_parts))
utils.log_indexing('hits: ' + str(hits))
logger_.log('parts: ' + str(total_parts), True)
logger_.log('hits: ' + str(hits), True)

if hits > 0:
utils.log_indexing('average hit identity: ' + str(total_identity / hits))
logger_.log('average hit identity: ' + str(total_identity / hits), True)


def uclust2uris(fileName):
Expand Down Expand Up @@ -138,17 +140,17 @@ def uclust2clusters():


def update_clusters():
utils.log_indexing('------------ Updating clusters ------------')
utils.log_indexing('******** Query for sequences ********')
logger_.log('------------ Updating clusters ------------', True)
logger_.log('******** Query for sequences ********', True)
sequences_response = query.query_sparql(sequence_query)
utils.log_indexing('******** Query for sequences complete ********')
logger_.log('******** Query for sequences complete ********', True)
write_fasta(sequences_response)

utils.log_indexing('******** Running uclust ********')
logger_.log('******** Running uclust ********', True)
run_uclust()
utils.log_indexing('******** Running uclust complete ********')
logger_.log('******** Running uclust complete ********', True)

analyze_uclust()
utils.log_indexing('------------ Successsfully updated clusters ------------\n')
logger_.log('------------ Successsfully updated clusters ------------\n', True)
return uclust2clusters()

63 changes: 63 additions & 0 deletions flask/configManager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json
import datetime

class ConfigManager:
def __init__(self, config_file='config.json'):
self.config_file = config_file
self._config = None

def load_config(self):
"""
Gets a copy of the config file
Returns: Config file in JSON
"""
if self._config is None:
with open(self.config_file) as f:
self._config = json.load(f)
return self._config

def save_config(self, new_config):
"""
Overwrites the existing config with a new config file
Args:
new_config: New config file with the updated information
Returns:
"""
config = self.load_config()
config.update(new_config)
with open(self.config_file, 'w') as f:
json.dump(config, f)

def save_time(self, attribute):
"""
Saves the current time to an attribute in the config
Args:
attribute: Config attribute to save current time to
Returns:
"""
config = self.load_config()
config[attribute] = datetime.datetime.now().isoformat()
self.save_config(config)

def get_es_endpoint(self):
return self.load_config().get('elasticsearch_endpoint')

def save_update_end_time(self):
"""
Save end time of indexing
Returns:
"""
return self.save_time("last_update_end")


def save_update_start_time(self):
"""
Save start time of indexing
Returns:
"""
return self.save_time("last_update_start")
76 changes: 76 additions & 0 deletions flask/dataManager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pickle
import os
class DataManager:
def __init__(self, clusters_filename='dumps/clusters_dump', uri2rank_filename='dumps/uri2rank_dump'):
self.clusters_filename = clusters_filename
self.uri2rank_filename = uri2rank_filename
self._clusters = None
self._uri2rank = None

def save_clusters(self, clusters):
"""
Save clusters of parts
Args:
new_clusters: Clusters to be saved
Returns:
"""
self._clusters = clusters
self._serialize(self._clusters, self.clusters_filename)

def get_clusters(self):
if self._clusters is None:
self._clusters = self._deserialize(self.clusters_filename)
return self._clusters

def save_uri2rank(self, uri2rank):
"""
Saves the pagerank of all URI's
Args:
new_uri2rank:
Returns:
"""
self._uri2rank = uri2rank
self._serialize(self._uri2rank, self.uri2rank_filename)

def get_uri2rank(self):
"""
Gets all pageranks of URI's
Returns:
"""
if self._uri2rank is None:
self._uri2rank = self._deserialize(self.uri2rank_filename)
return self._uri2rank

@staticmethod
def _serialize(data, filename):
"""
Serializes some data to a file
Args:
data: Data to be written
filename: File to be written to
Returns:
"""
with open(filename, 'wb') as f:
pickle.dump(data, f)

@staticmethod
def _deserialize(filename):
"""
Deserializes data from a serialized file
Args:
filename: Serialized file
Returns: Deserialized data from file
"""
if os.path.exists(filename):
with open(filename, 'rb') as f:
return pickle.load(f)
return {}
17 changes: 17 additions & 0 deletions flask/elasticsearchManager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from elasticsearch import Elasticsearch

class ElasticsearchManager:
def __init__(self, config_manager):
self.config_manager = config_manager
self._es = None

def get_es(self):
"""
Gets an instance of elasticsearch
Returns: The instance of elasticsearch
"""
if self._es is None:
self._es = Elasticsearch([self.config_manager.get_es_endpoint()], verify_certs=True)
if not self._es.ping():
raise ValueError('Elasticsearch connection failed')
return self._es
Loading

0 comments on commit e4b79f3

Please sign in to comment.