Skip to content

Commit

Permalink
some improvements to heredicare communication
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvinDo committed Aug 20, 2024
1 parent 7dc21c0 commit 1d2822e
Show file tree
Hide file tree
Showing 10 changed files with 245 additions and 16 deletions.
24 changes: 19 additions & 5 deletions src/common/db_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ def update_consensus_classification_needs_clinvar_upload(self, consensus_classif
self.cursor.execute(command, (consensus_classification_id, ))
self.conn.commit()

def get_variant_ids_which_need_heredicare_upload(self):
def get_variant_ids_which_need_heredicare_upload(self, variant_ids_oi = None):
# excludes structural variants and intergenic variants and variants of unfinished submissions
command = """
SELECT DISTINCT variant_id FROM consensus_classification WHERE needs_heredicare_upload = 1 AND is_recent = 1 and variant_id NOT IN (
Expand All @@ -1436,11 +1436,16 @@ def get_variant_ids_which_need_heredicare_upload(self):
GROUP BY variant_id
HAVING COUNT(exon_nr is not null or intron_nr is not null OR NULL) = 0)
"""
self.cursor.execute(command)
actual_information = ()
if variant_ids_oi is not None:
placeholders = self.get_placeholders(len(variant_ids_oi))
command += " AND variant_id IN " + placeholders
actual_information = tuple(variant_ids_oi)
self.cursor.execute(command, actual_information)
result = self.cursor.fetchall()
return [x[0] for x in result]

def get_variant_ids_which_need_clinvar_upload(self):
def get_variant_ids_which_need_clinvar_upload(self, variant_ids_oi = None):
command = """
SELECT DISTINCT variant_id FROM consensus_classification WHERE needs_clinvar_upload = 1 AND is_recent = 1 and variant_id NOT IN (
SELECT publish_clinvar_queue.variant_id FROM publish_clinvar_queue RIGHT JOIN (
Expand All @@ -1449,7 +1454,12 @@ def get_variant_ids_which_need_clinvar_upload(self):
WHERE status = 'submitted' or status = 'pending' or status = 'progress' or status = 'retry'
) AND variant_id NOT IN (SELECT id FROM variant WHERE sv_variant_id IS NOT NULL)
"""
self.cursor.execute(command)
actual_information = ()
if variant_ids_oi is not None:
placeholders = self.get_placeholders(len(variant_ids_oi))
command += " AND variant_id IN " + placeholders
actual_information = tuple(variant_ids_oi)
self.cursor.execute(command, actual_information)
result = self.cursor.fetchall()
return [x[0] for x in result]

Expand Down Expand Up @@ -1510,7 +1520,11 @@ def get_variant_id_from_external_id(self, external_id, annotation_type_id): #!!
return result[0]
return result


def get_variant_ids_from_external_id(self, external_id, annotation_type_id):
command = "SELECT variant_id FROM variant_ids WHERE external_id = %s AND annotation_type_id = %s"
self.cursor.execute(command, (external_id, annotation_type_id))
result = self.cursor.fetchall()
return [x[0] for x in result]

def get_consensus_classification(self, variant_id, most_recent = False, sql_modifier=None): # it is possible to have multiple consensus classifications
command = "SELECT id,user_id,variant_id,classification,comment,date,is_recent,classification_scheme_id,scheme_class,needs_heredicare_upload,needs_clinvar_upload FROM consensus_classification WHERE variant_id = %s"
Expand Down
5 changes: 1 addition & 4 deletions src/common/heredicare_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def get_submission_status(self, submission_id):

resp = requests.get(url, headers=header)
if resp.status_code != 200:
message = "ERROR: HerediCare API getsubmission id endpoint endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text)
message = "ERROR: HerediCare API get submission id endpoint endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text)
status = "api_error"
else: # success
resp = resp.json(strict=False)
Expand Down Expand Up @@ -568,9 +568,6 @@ def is_valid_post_data(self, value, regex, none_allowed = False):
return False
pattern = re.compile(regex)
result = pattern.match(value)
print(regex)
print(result)
print(value)
if result is None:
return False
return True
Expand Down
2 changes: 1 addition & 1 deletion src/frontend_celery/start_dump.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ helpFunction()
{
echo ""
echo "Usage: $0 -w env -h path"
echo "This script starts the heredivar frontend gunicorn or development server"
echo "This script creates a full backup for HerediVar"
echo -e "\t-w Provide 'dev' for development server and 'prod' for production gunicorn server."
exit 1 # Exit script after printing help
}
Expand Down
41 changes: 41 additions & 0 deletions src/frontend_celery/start_import.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash
set -e
set -o pipefail
#set -o verbose

helpFunction()
{
echo ""
echo "Usage: $0 -w env -h path"
echo "This script starts a full import from HerediCaRe"
echo -e "\t-w Provide 'dev' for development server and 'prod' for production gunicorn server."
exit 1 # Exit script after printing help
}

while getopts "w:" opt
do
case "$opt" in
w ) we="$OPTARG" ;;
? ) helpFunction ;; # Print helpFunction in case parameter is non-existent
esac
done

# Print helpFunction in case parameters are empty
if [ -z "$we" ]
then
echo "Some or all of the parameters are empty";
helpFunction
fi

export WEBAPP_ENV=$we

echo "Importing HerediCaRe and updating upload stati...."

SCRIPT=$(readlink -f "$0")
ROOT=$(dirname $(dirname $(dirname "$SCRIPT")))
cd $ROOT
pwd

source .venv/bin/activate

python3 $ROOT/src/frontend_celery/webapp/utils/import_heredicare.py
127 changes: 127 additions & 0 deletions src/frontend_celery/webapp/download/download_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,8 +712,135 @@ def get_possible_classes_enigma_brca12_1_1_0(class_counts):
return possible_classes


def get_possible_classes_enigma_pms2_100(class_counts):
possible_classes = set()

# pathogenic
#1 Very Strong (PVS1) AND ≥ 1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong)
#USELESS: 1 Very Strong (PVS1) AND ≥ 2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Very Strong (PVS1) AND ≥ 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Very Strong (PVS1) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)


#≥ 2 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND ≥ 3 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND 2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 4 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)

if class_counts['pvs'] >= 2:
possible_classes.add(5)
if class_counts['pvs'] == 1:
if class_counts['ps'] >= 1 or class_counts['pm'] >= 1 or class_counts['pp'] >= 2:
possible_classes.add(5)
if class_counts['ps'] >= 2:
possible_classes.add(5)
if class_counts['ps'] == 1:
if class_counts['pm'] >= 3 or (class_counts['pm'] == 2 and class_counts['pp'] >= 2) or (class_counts['pm'] == 1 and class_counts['pp'] >= 4):
possible_classes.add(5)

# likely pathogenic
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND 2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)

#≥ 3 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 4 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)

#1 Very Strong (PVS1) AND 1 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
if class_counts['ps'] == 1:
if (class_counts['pm'] >= 1 and class_counts['pm'] <= 2) or class_counts['pp'] >= 2:
possible_classes.add(4)
if class_counts['pm'] >= 3:
possible_classes.add(4)
if class_counts['pm'] == 2 and class_counts['pp'] >= 2:
possible_classes.add(4)
if class_counts['pm'] == 1 and class_counts['pp'] >= 4:
possible_classes.add(4)
if class_counts['pvs'] == 1 and class_counts['pp'] == 1:
possible_classes.add(4)

# benign
#1 Stand Alone (BA1)
#≥ 2 Strong (BS1, BS2, BS3, BS4, BP5_Strong)
if class_counts['ba'] >= 1:
possible_classes.add(1)
if class_counts['bs'] >= 2:
possible_classes.add(1)

# likely benign
#1 Strong (BS1, BS2, BS3, BS4, BP5_Strong) AND 1 Supporting (BS3_Supporting, BS4_Supporting, BP4, BP5, BP7)
#≥ 2 Supporting (BS3_Supporting, BS4_Supporting, BP4, BP5, BP7)
if class_counts['bs'] == 1 and class_counts['bp'] == 1:
possible_classes.add(2)
if class_counts['bp'] >= 2:
possible_classes.add(2)

return possible_classes



def get_possible_classes_enigma_mlh_100(class_counts):
possible_classes = set()

# pathogenic
#1 Very Strong (PVS1) AND ≥ 1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP3_Strong, PP4_Strong)
#1 Very Strong (PVS1) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#USELESS: 1 Very Strong (PVS1) AND 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Very Strong (PVS1) AND ≥ 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)

#≥ 2 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP3_Strong, PP4_Strong)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP3_Strong, PP4_Strong) AND ≥ 3 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP3_Strong, PP4_Strong) AND 2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP3_Strong, PP4_Strong) AND 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 4 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)

if class_counts['pvs'] >= 2:
possible_classes.add(5)
if class_counts['pvs'] == 1:
if class_counts['ps'] >= 1 or class_counts['pm'] >= 1 or class_counts['pp'] >= 2:
possible_classes.add(5)
if class_counts['ps'] >= 2:
possible_classes.add(5)
if class_counts['ps'] == 1:
if class_counts['pm'] >= 3 or (class_counts['pm'] == 2 and class_counts['pp'] >= 2) or (class_counts['pm'] == 1 and class_counts['pp'] >= 4):
possible_classes.add(5)

#Likely Pathogenic
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND 1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#1 Strong (PVS1_Strong, PS1, PS2, PS3, PP1_Strong, PP4_Strong) AND 2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)

#≥ 3 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate)
#2 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 2 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
#1 Moderate (PVS1_Moderate, PS1_Moderate, PS2_Moderate, PS3_Moderate, PM3, PM5, PM6, PP1_Moderate, PP3_Moderate, PP4_Moderate) AND ≥ 4 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)

#1 Very Strong (PVS1) AND 1 Supporting (PS3_Supporting, PM2_Supporting, PM5_Supporting, PP1, PP3, PP4)
if class_counts['ps'] == 1:
if (class_counts['pm'] >= 1 and class_counts['pm'] <= 2) or class_counts['pp'] >= 2:
possible_classes.add(4)
if class_counts['pm'] >= 3:
possible_classes.add(4)
if class_counts['pm'] == 2 and class_counts['pp'] >= 2:
possible_classes.add(4)
if class_counts['pm'] == 1 and class_counts['pp'] >= 4:
possible_classes.add(4)
if class_counts['pvs'] == 1 and class_counts['pp'] == 1:
possible_classes.add(4)

#Benign
#≥ 2 Strong (BS1, BS2, BS3, BS4, BP5_Strong)
#1 Stand Alone (BA1, BS1_Stand Alone)
if class_counts['ba'] >= 1:
possible_classes.add(1)
if class_counts['bs'] >= 2:
possible_classes.add(1)

#Likely Benign
#1 Strong (BS1, BS2, BS3, BS4, BP5_Strong) AND 1 Supporting (BS3_Supporting, BS4_Supporting, BP4, BP5, BP7)
if class_counts['bs'] == 1 and class_counts['bp'] == 1:
possible_classes.add(2)

return possible_classes



Expand Down
4 changes: 4 additions & 0 deletions src/frontend_celery/webapp/download/download_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ def calculate_class(scheme_type = None, version = None, selected_classes = ''):
possible_classes = download_functions.get_possible_classes_enigma_pten(class_counts) # get a set of possible classes depending on selected criteria
elif 'pten' in scheme_type and version == "v3.1.0":
possible_classes = download_functions.get_possible_classes_enigma_pten_310(class_counts) # get a set of possible classes depending on selected criteria
elif 'pms2' in scheme_type and version == "v1.0.0":
possible_classes = download_functions.get_possible_classes_enigma_pms2_100(class_counts) # get a set of possible classes depending on selected criteria
elif 'mlh1' in scheme_type and version == "v1.0.0":
possible_classes = download_functions.get_possible_classes_enigma_mlh_100(class_counts) # get a set of possible classes depending on selected criteria
else:
raise RuntimeError('The class could not be calculated with given parameters. Did you specify a supported scheme and version? (either "acmg" or VUS "task-force" based)')

Expand Down
10 changes: 8 additions & 2 deletions src/frontend_celery/webapp/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,8 @@ def map_hg38(variant, user_id, conn:Connection, insert_variant = True, perform_a

if not was_successful and hgvs_c_valid and gene_valid:
gene_id = conn.get_gene_id_by_symbol(gene_symbol)
#transcripts = conn.get_gencode_basic_transcripts(gene_id)
transcripts = conn.get_mane_select_for_gene(gene_id)
transcripts = conn.get_gencode_basic_transcripts(gene_id)
#transcripts = conn.get_mane_select_for_gene(gene_id)

if transcripts is not None:
#print(transcripts)
Expand Down Expand Up @@ -503,6 +503,12 @@ def map_hg38(variant, user_id, conn:Connection, insert_variant = True, perform_a
if variant_id is not None and external_ids is not None: # insert new vid
for external_id in external_ids:
annotation_type_id = conn.get_most_recent_annotation_type_id("heredicare_vid")
previous_variant_ids = conn.get_variant_ids_from_external_id(external_id, annotation_type_id)
for previous_variant_id in previous_variant_ids:
if previous_variant_id != variant_id:
conn.delete_external_id(external_id, annotation_type_id, previous_variant_id)
if perform_annotation:
start_annotation_service(previous_variant_id, user_id, conn)
conn.insert_external_variant_id(variant_id, external_id, annotation_type_id)

if not was_successful and message == '':
Expand Down
12 changes: 9 additions & 3 deletions src/frontend_celery/webapp/upload/upload_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from common import functions
from common.heredicare_interface import Heredicare
from common.db_IO import Connection
from ..utils import *
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from utils import *
from werkzeug.utils import secure_filename

# searches for the tag variant_ids in a request.args.
Expand All @@ -32,8 +33,13 @@ def extract_variant_ids(request_args, conn: Connection) -> list:
for list_id in list_ids_strs:
require_valid(list_id, "user_variant_lists", conn)
require_list_permission(list_id, required_permissions = ['read'], conn = conn)
result.extend(conn.get_variant_ids_from_list(list_id))
return result
list_variant_ids = conn.get_variant_ids_from_list(list_id)
check_update_all_most_recent_heredicare(list_variant_ids, conn)
check_update_all_most_recent_clinvar(list_variant_ids, conn)

result.extend(conn.get_variant_ids_which_need_heredicare_upload(variant_ids_oi = list_variant_ids))
result.extend(conn.get_variant_ids_which_need_clinvar_upload(variant_ids_oi = list_variant_ids))
return list(set(result)) # make unique

# this function searches for clinvar_gene_{variant_id} tags of variants
# of interest in a request form and saves it to a dictionary
Expand Down
2 changes: 1 addition & 1 deletion src/frontend_celery/webapp/upload/upload_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def publish(self, publish_queue_id, variant_ids, options, user_roles):
conn.update_publish_queue_status(publish_queue_id, status = "progress", message = "")

for variant_id in variant_ids:
variant = conn.get_variant(variant_id)
#variant = conn.get_variant(variant_id)

# start the task to upload the consensus classification to clinvar
if options['do_clinvar']:
Expand Down
34 changes: 34 additions & 0 deletions src/frontend_celery/webapp/utils/import_heredicare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import sys
from os import path
sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
from common.db_IO import Connection
import common.functions as functions
import common.paths as paths
import json
import argparse
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
import upload.upload_functions as upload_functions
import frontend_celery.webapp.tasks as tasks


roles = ["db_admin"]

conn = Connection(roles)

# update state of unfinished heredicare uploads
variant_ids = conn.get_variant_ids_by_publish_heredicare_status(stati = ['pending', 'progress', 'submitted'])
upload_functions.check_update_all_most_recent_heredicare(variant_ids, conn)

# update state of unfinished clinar uploads
variant_ids = conn.get_variant_ids_by_publish_clinvar_status(stati = ['pending', 'progress', 'submitted'])
upload_functions.check_update_all_most_recent_clinvar(variant_ids, conn)


# import HerediCaRe VIDs
botname = "heredivar_bot"
conn.insert_user(username = botname, first_name = "HerediVar", last_name = "Bot", affiliation = "Bot", api_roles = "")
bot_id = conn.get_user_id(botname)
vids = "update" # start task importing all updated heredicare vids
import_queue_id = tasks.start_variant_import(vids, bot_id, roles, conn)

conn.close()

0 comments on commit 1d2822e

Please sign in to comment.