Skip to content

Commit

Permalink
update & bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvinDo committed Jul 21, 2023
1 parent 3e551e2 commit 189e6da
Show file tree
Hide file tree
Showing 18 changed files with 677 additions and 163 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def execute(self, inpath, annotated_inpath, **kwargs):


def save_to_db(self, info, variant_id, conn):
if self.job_config['do_task_force_protein_domains']:
if self.job_config['do_taskforce_domains']:
one_variant = conn.get_one_variant(variant_id) # 0id,1chr,2pos,3ref,4alt
task_force_protein_domains = conn.get_task_force_protein_domains(one_variant[1], one_variant[2], int(one_variant[2]) + len(one_variant[4]))
for domain in task_force_protein_domains:
Expand Down
1 change: 0 additions & 1 deletion src/annotation_service/annotation_jobs/vep_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def save_to_db(self, info, variant_id, conn):
hgvs_c = urllib.parse.unquote(hgvs_c[hgvs_c.find(':')+1:]) # remove transcript name
hgvs_p = vep_entry[2]
hgvs_p = urllib.parse.unquote(hgvs_p[hgvs_p.find(':')+1:]) # remove transcript name
print(hgvs_p)
transcript_name = vep_entry[0]
if '.' in transcript_name:
transcript_name = transcript_name[:transcript_name.find('.')] # remove transcript version if it is present
Expand Down
4 changes: 3 additions & 1 deletion src/annotation_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_default_job_config():
'do_priors': True,

# additional annotations
'do_task_force_protein_domains': True,
'do_taskforce_domains': True,
'do_litvar': True
}
return job_config
Expand Down Expand Up @@ -101,6 +101,8 @@ def get_annotation_tempfile(annotation_queue_id):
def process_one_request(annotation_queue_id, job_config = get_default_job_config()):
""" this is the main worker of the annotation job - A 4 step process -"""

print(job_config)

all_jobs = get_jobs(job_config)

conn = Connection(roles=["annotation"])
Expand Down
117 changes: 90 additions & 27 deletions src/common/db_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,6 @@ def get_db_user(roles):




def enquote(string):
string = str(string).strip("'") # remove quotes if the input string is already quoted!
return "'" + string + "'"

def enbrace(string):
#string = str(string).strip("(").strip(")")
string = "(" + string + ")"
return string



class Connection:
def __init__(self, roles = ["read_only"]):
self.conn = get_db_connection(roles)
Expand Down Expand Up @@ -326,7 +314,7 @@ def get_clinvar_variant_annotation_id_by_variant_id(self, variant_id):
# FROM clinvar_variant_annotation a \
# INNER JOIN ( \
# SELECT variant_id, max(version_date) AS version_date FROM clinvar_variant_annotation GROUP BY variant_id \
# ) b ON a.variant_id = b.variant_id AND a.variant_id = " + enquote(variant_id) + " AND a.version_date = b.version_date"
# ) b ON a.variant_id = b.variant_id AND a.variant_id = " + functions.enquote(variant_id) + " AND a.version_date = b.version_date"
command = "SELECT id FROM clinvar_variant_annotation WHERE variant_id=%s"
self.cursor.execute(command, (variant_id, ))
result = self.cursor.fetchone()
Expand Down Expand Up @@ -354,7 +342,7 @@ def insert_transcript(self, symbol, hgnc_id, transcript_ensembl, transcript_biot
if gene_id is not None:
command = ''
if transcript_refseq is not None and transcript_ensembl is not None:
#transcript_ensembl_list = ', '.join([enquote(x) for x in transcript_ensembl.split(',')])
#transcript_ensembl_list = ', '.join([functions.enquote(x) for x in transcript_ensembl.split(',')])
transcript_ensembl_list = transcript_ensembl.split(',')
transcript_ensembl_placeholders = ', '.join(['%s'] * len(transcript_ensembl_list))
self.cursor.execute("SELECT COUNT(*) FROM transcript WHERE name IN (" + transcript_ensembl_placeholders + ")", tuple(transcript_ensembl_list))
Expand Down Expand Up @@ -644,7 +632,7 @@ def get_variants_page_merged(self, page, page_size, user_id, ranges = None, gene
new_constraints.append("(chr=%s AND pos BETWEEN %s AND %s)")
actual_information += (chr, start, end)
new_constraints = ' OR '.join(new_constraints)
new_constraints = enbrace(new_constraints)
new_constraints = functions.enbrace(new_constraints)
postfix = self.add_constraints_to_command(postfix, new_constraints)
if genes is not None and len(genes) > 0:
#genes = [self.get_gene(self.convert_to_gene_id(x))[1] for x in genes]
Expand All @@ -657,7 +645,7 @@ def get_variants_page_merged(self, page, page_size, user_id, ranges = None, gene
if len(hgnc_ids) > 0:
placeholders = ["%s"] * len(hgnc_ids)
placeholders = ', '.join(placeholders)
placeholders = enbrace(placeholders)
placeholders = functions.enbrace(placeholders)
new_constraints = "id IN (SELECT DISTINCT variant_id FROM variant_consequence WHERE hgnc_id IN " + placeholders + ")"
actual_information += tuple(hgnc_ids)
postfix = self.add_constraints_to_command(postfix, new_constraints)
Expand All @@ -673,7 +661,7 @@ def get_variants_page_merged(self, page, page_size, user_id, ranges = None, gene
if len(consensus_without_dash) > 0: # if we have one or more classes without the -
placeholders = ["%s"] * len(consensus_without_dash)
placeholders = ', '.join(placeholders)
placeholders = enbrace(placeholders)
placeholders = functions.enbrace(placeholders)
new_constraints_inner = new_constraints_inner + "SELECT variant_id FROM consensus_classification WHERE classification IN " + placeholders + " AND is_recent = 1"
actual_information += tuple(consensus_without_dash)
new_constraints = "id IN (" + new_constraints_inner + ")"
Expand All @@ -689,7 +677,7 @@ def get_variants_page_merged(self, page, page_size, user_id, ranges = None, gene
if len(user_without_dash) > 0: # if we have one or more classes without the -
placeholders = ["%s"] * len(user_without_dash)
placeholders = ', '.join(placeholders)
placeholders = enbrace(placeholders)
placeholders = functions.enbrace(placeholders)
# search for the most recent user classifications from the user which is searching for variants and which are in the list of user classifications (variable: user)
new_constraints_inner = new_constraints_inner + "SELECT * FROM ( SELECT user_classification.variant_id FROM user_classification \
LEFT JOIN user_classification uc ON uc.variant_id = user_classification.variant_id AND uc.date > user_classification.date \
Expand All @@ -709,7 +697,7 @@ def get_variants_page_merged(self, page, page_size, user_id, ranges = None, gene
# if len(clinvar_without_dash) > 0:
# placeholders = ["%s"] * len(clinvar_without_dash)
# placeholders = ', '.join(placeholders)
# placeholders = enbrace(placeholders)
# placeholders = functions.enbrace(placeholders)
# clinvar_without_dash
# new_constraints_inner += "SELECT "
if hgvs is not None and len(hgvs) > 0:
Expand All @@ -731,14 +719,14 @@ def get_variants_page_merged(self, page, page_size, user_id, ranges = None, gene
all_variants = [''] # empty string can never be found
placeholders = ["%s"] * len(all_variants)
placeholders = ', '.join(placeholders)
placeholders = enbrace(placeholders)
placeholders = functions.enbrace(placeholders)
new_constraints = "id IN " + placeholders
actual_information += tuple(all_variants)
postfix = self.add_constraints_to_command(postfix, new_constraints)
if variant_ids_oi is not None and len(variant_ids_oi) > 0:
placeholders = ["%s"] * len(variant_ids_oi)
placeholders = ', '.join(placeholders)
placeholders = enbrace(placeholders)
placeholders = functions.enbrace(placeholders)
new_constraints = "id IN " + placeholders
actual_information += tuple(variant_ids_oi)
postfix = self.add_constraints_to_command(postfix, new_constraints)
Expand Down Expand Up @@ -966,7 +954,7 @@ def get_variant_ids_by_hgvs(self, hgvs):
def insert_consensus_classification(self, user_id, variant_id, consensus_classification, comment, evidence_document, date, scheme_id, scheme_class):
self.invalidate_previous_consensus_classifications(variant_id)
command = "INSERT INTO consensus_classification (user_id, variant_id, classification, comment, date, evidence_document, classification_scheme_id, scheme_class) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
self.cursor.execute(command, (user_id, variant_id, consensus_classification, comment, date, evidence_document.decode(), scheme_id, scheme_class))
self.cursor.execute(command, (user_id, variant_id, str(consensus_classification), comment, date, evidence_document.decode(), scheme_id, scheme_class))
self.conn.commit()

def update_consensus_classification_report(self, consensus_classification_id, report):
Expand Down Expand Up @@ -1142,12 +1130,12 @@ def get_classification_scheme(self, scheme_id):

def insert_user_classification(self, variant_id, classification, user_id, comment, date, scheme_id, scheme_class):
command = "INSERT INTO user_classification (variant_id, classification, user_id, comment, date, classification_scheme_id, scheme_class) VALUES (%s, %s, %s, %s, %s, %s, %s)"
self.cursor.execute(command, (variant_id, classification, user_id, comment, date, scheme_id, str(scheme_class)))
self.cursor.execute(command, (variant_id, str(classification), user_id, comment, date, scheme_id, str(scheme_class)))
self.conn.commit()

def update_user_classification(self, user_classification_id, classification, comment, date, scheme_class):
command = "UPDATE user_classification SET classification = %s, comment = %s, date = %s, scheme_class = %s WHERE id = %s"
self.cursor.execute(command, (classification, comment, date, str(scheme_class), user_classification_id))
self.cursor.execute(command, (str(classification), comment, date, str(scheme_class), user_classification_id))
self.conn.commit()


Expand Down Expand Up @@ -1436,7 +1424,7 @@ def get_one_variant(self, variant_id):
return result

def get_variant_id(self, chr, pos, ref, alt):
#command = "SELECT id FROM variant WHERE chr = " + enquote(chr) + " AND pos = " + str(pos) + " AND ref = " + enquote(ref) + " AND alt = " + enquote(alt)
#command = "SELECT id FROM variant WHERE chr = " + functions.enquote(chr) + " AND pos = " + str(pos) + " AND ref = " + functions.enquote(ref) + " AND alt = " + functions.enquote(alt)
command = "SELECT id FROM variant WHERE chr = %s AND pos = %s AND ref = %s AND alt = %s"
self.cursor.execute(command, (chr, pos, ref, alt))
variant_id = self.cursor.fetchone()
Expand Down Expand Up @@ -1813,7 +1801,7 @@ def get_assays(self, variant_id, assay_types = 'all'):
if assay_types != 'all':
placeholders = ["%s"] * len(assay_types)
placeholders = ', '.join(placeholders)
placeholders = enbrace(placeholders)
placeholders = functions.enbrace(placeholders)
new_constraints = " id IN " + placeholders
command += new_constraints
actual_information += tuple(assay_types)
Expand Down Expand Up @@ -1969,4 +1957,79 @@ def get_heredivar_clinvar_submission(self, variant_id):
#def update_heredivar_clinvar_submission_accession_id(self, accession_id):
# command = "UPDATE heredivar_clinvar_submissions SET accession_id = %s"
# self.cursor.execute(command, (accession_id, ))
# self.conn.commit()
# self.conn.commit()


def get_current_annotation_staus_all_variants(self):
command = """
SELECT a1.variant_id, a1.user_id, a1.requested, a1.status, a1.finished_at, a1.error_message
FROM annotation_queue a1 LEFT JOIN annotation_queue a2
ON (a1.variant_id = a2.variant_id AND a1.requested < a2.requested)
WHERE a2.id IS NULL
"""
self.cursor.execute(command)
result = self.cursor.fetchall()
return result

def get_annotation_statistics(self):
# return the most recent annotation queue entry for the variant
result = self.get_current_annotation_staus_all_variants()

annotation_stati = {'success': [], 'pending': [], 'error': [], 'retry': []}
errors = {}
warnings = {}
total_num_variants = len(result)
for annotation_status in result:
variant_id = annotation_status[0]
annotation_stati[annotation_status[3]].append(variant_id)
if annotation_status[3] == 'error':
errors[variant_id] = annotation_status[5]
if annotation_status[3] != 'error' and annotation_status[5] != '':
warnings[variant_id] = annotation_status[5]

return annotation_stati, errors, warnings, total_num_variants

def get_database_info(self):
annotation_type_ids = self.get_recent_annotation_type_ids()
placeholders = self.get_placeholders(len(annotation_type_ids))
command = "SELECT display_title,description,version,version_date FROM annotation_type WHERE id IN " + placeholders
self.cursor.execute(command, tuple(annotation_type_ids.values()))
result = self.cursor.fetchall()
return result

def get_number_of_classified_variants(self):
command = """
SELECT a1.variant_id
FROM consensus_classification a1 LEFT JOIN consensus_classification a2
ON (a1.variant_id = a2.variant_id AND a1.date < a2.date)
WHERE a2.variant_id IS NULL
"""
self.cursor.execute(command)
result = self.cursor.fetchall()
return len(result)

def get_recent_annotation_type_ids(self, only_transcript_specific = False):
addon = ""
if only_transcript_specific:
addon = "WHERE is_transcript_specific = 1"
command = """
SELECT n.id, n.title
FROM annotation_type n
INNER JOIN (
SELECT title, MAX(version_date) as max_version_date
FROM annotation_type %s GROUP BY title
) AS max ON max.title = n.title and max.max_version_date = n.version_date
""" % (addon, )
self.cursor.execute(command)
result = self.cursor.fetchall()
recent_annotation_ids = {}
for entry in result:
recent_annotation_ids[entry[1]] = entry[0]

return recent_annotation_ids

def get_placeholders(self, num):
placeholders = ["%s"] * num
placeholders = ', '.join(placeholders)
placeholders = functions.enbrace(placeholders)
return placeholders
12 changes: 12 additions & 0 deletions src/common/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,3 +535,15 @@ def read_dotenv():

load_dotenv(os.path.join(basedir, dotenvfile))

def enquote(string):
string = str(string).strip("'") # remove quotes if the input string is already quoted!
return "'" + string + "'"

def enbrace(string):
#string = str(string).strip("(").strip(")")
string = "(" + string + ")"
return string

def enpercent(string):
string = str(string).strip('%')
return '%' + string + '%'
6 changes: 5 additions & 1 deletion src/frontend_celery/webapp/doc/doc_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,8 @@ def about():

@doc_blueprint.route('/documentation')
def documentation():
return render_template('doc/documentation.html')
return render_template('doc/documentation.html')

@doc_blueprint.route('/changelog')
def changelog():
return render_template('doc/changelog.html')
7 changes: 6 additions & 1 deletion src/frontend_celery/webapp/main/main_routes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from flask import Blueprint, render_template
from ..utils import *

main_blueprint = Blueprint(
'main',
Expand All @@ -11,4 +12,8 @@

@main_blueprint.route('/')
def index():
return render_template('index.html')
conn = get_connection()
annotation_stati, errors, warnings, total_num_variants = conn.get_annotation_statistics()
database_info = conn.get_database_info()
total_num_classified_variants = conn.get_number_of_classified_variants()
return render_template('index.html', total_num_variants = total_num_variants, database_info = database_info, total_num_classified_variants = total_num_classified_variants)
4 changes: 4 additions & 0 deletions src/frontend_celery/webapp/static/css/utils.css
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@
min-width:20em;
width:20em;
}
.width_very_medium {
min-width:15em;
width:15em;
}
.width_small {
min-width:10em;
width:10em;
Expand Down
Loading

0 comments on commit 189e6da

Please sign in to comment.