From cb5d97cca6f4d913434df3b16cba3a3abfbf174c Mon Sep 17 00:00:00 2001 From: MarvinDo Date: Fri, 30 Aug 2024 16:03:25 +0200 Subject: [PATCH] Code-Tidyup --- .../annotation_jobs/_job.py | 2 - .../annotation_jobs/annotate_from_vcf_job.py | 42 +- .../automatic_classification_job.py | 8 +- .../annotation_jobs/cancerhotspots_job.py | 17 - .../annotation_jobs/consequence_job.py | 14 - .../annotation_jobs/heredicare_job.py | 9 +- .../annotation_jobs/hexplorer_job.py | 11 - .../annotation_jobs/maxentscan_job.py | 16 +- .../annotation_jobs/spliceai_job.py | 9 +- .../task_force_protein_domain_job.py | 5 +- .../annotation_jobs/vep_job.py | 4 - src/annotation_service/annotation_queue.py | 3 +- src/annotation_service/main.py | 24 +- src/annotation_service/pubmed_parser.py | 1 - src/common/clinvar_interface.py | 9 - src/common/db_IO.py | 456 +----------------- src/common/functions.py | 135 ++---- src/common/heredicare_interface.py | 217 ++------- src/common/paths.py | 64 +-- src/frontend_celery/config.py | 8 - src/frontend_celery/main.py | 8 - src/frontend_celery/webapp/api/api_routes.py | 6 +- .../webapp/download/download_routes.py | 65 --- .../webapp/download/download_tasks.py | 3 +- .../webapp/errorhandlers/errorhandlers.py | 2 - src/frontend_celery/webapp/static/js/utils.js | 49 +- .../webapp/static/js/variant_addition.js | 201 +++++++- src/frontend_celery/webapp/tasks.py | 22 +- .../webapp/templates/index.html | 12 + .../webapp/templates/variant/variant.html | 118 +---- .../webapp/upload/upload_functions.py | 1 - .../webapp/upload/upload_routes.py | 1 - .../webapp/user/user_functions.py | 20 - .../webapp/user/user_routes.py | 12 +- src/frontend_celery/webapp/user/user_tasks.py | 17 - .../webapp/utils/decorators.py | 24 - .../webapp/utils/import_heredicare.py | 2 - .../webapp/{test.py => utils/test_upload.py} | 0 .../webapp/utils/upload_status_checker.py | 6 +- .../webapp/variant/variant_functions.py | 50 +- .../webapp/variant/variant_routes.py | 55 ++- 41 files changed, 434 insertions(+), 1294 deletions(-) delete mode 100644 src/frontend_celery/webapp/user/user_functions.py delete mode 100644 src/frontend_celery/webapp/user/user_tasks.py rename src/frontend_celery/webapp/{test.py => utils/test_upload.py} (100%) diff --git a/src/annotation_service/annotation_jobs/_job.py b/src/annotation_service/annotation_jobs/_job.py index 7ce6fd37..42ec9c6f 100644 --- a/src/annotation_service/annotation_jobs/_job.py +++ b/src/annotation_service/annotation_jobs/_job.py @@ -1,6 +1,4 @@ import abc -import tempfile -import os import common.functions as functions ########## This class is an abstract class which needs to be implemented by diff --git a/src/annotation_service/annotation_jobs/annotate_from_vcf_job.py b/src/annotation_service/annotation_jobs/annotate_from_vcf_job.py index 074b7b44..ee2f553a 100644 --- a/src/annotation_service/annotation_jobs/annotate_from_vcf_job.py +++ b/src/annotation_service/annotation_jobs/annotate_from_vcf_job.py @@ -2,9 +2,7 @@ from ._job import Job import common.paths as paths import common.functions as functions -import tempfile import os -from os.path import exists from ..pubmed_parser import fetch @@ -150,8 +148,8 @@ def save_to_db(self, info, variant_id, conn): clv_inpret = functions.find_between(info, 'ClinVar_inpret=', '(;|$)') if clv_revstat is not None and clv_inpret is not None and clv_varid is not None: - clv_revstat = functions.decode_vcf(clv_revstat)#.replace('\\', ',').replace('_', ' ') - clv_inpret = functions.decode_vcf(clv_inpret)#.replace('\\', ',').replace('_', ' ') + clv_revstat = functions.decode_vcf(clv_revstat) + clv_inpret = functions.decode_vcf(clv_inpret) conn.clean_clinvar(variant_id) # remove all clinvar information of this variant from database and insert it again -> only the most recent clinvar annotaion is saved in database! conn.insert_clinvar_variant_annotation(variant_id, clv_varid, clv_inpret, clv_revstat) @@ -263,14 +261,6 @@ def write_vcf_annoate_config(self): if job_config['do_flossies']: config_file.write(paths.FLOSSIES_path + "\tFLOSSIES\tnum_eur,num_afr\t\n") - ### add cancerhotspots annotations - #if job_config['do_cancerhotspots']: - # config_file.write(paths.cancerhotspots_path + "\tcancerhotspots\tcancertypes,AC,AF\t\n") - - ### add arup brca classification - #if job_config['do_arup']: - # config_file.write(paths.arup_brca_path + "\tARUP\tclassification\t\n") - ## add TP53 database information if job_config['do_tp53_database']: config_file.write(paths.tp53_db + "\ttp53db\tclass,transactivation_class,DNE_LOF_class,DNE_class,domain_function,pubmed\t\n") @@ -295,31 +285,3 @@ def write_vcf_annoate_config(self): config_file.close() return config_file_path - - - - -""" - -CSQ=ENST00000240651|||downstream_gene_variant|MODIFIER|||HGNC:26162|PYROXD1||||,ENST00000375266|||downstream_gene_variant|MODIFIER|||HGNC:26162|PYROXD1||||,ENST00000421138|ENST00000421138.6:c.1643_1644del|ENSP00000395449.2:p.His548LeufsTer8|frameshift_variant|HIGH|14/16||HGNC:9948|RECQL|Gene3D:1.10.10.10&PDB-ENSP_mappings:2v1x.A&PDB-ENSP_mappings:2v1x.B&PDB-ENSP_mappings:2wwy.A&PDB-ENSP_mappings:2wwy.B&PDB-ENSP_mappings:4u7d.A&PDB-ENSP_mappings:4u7d.B&PDB-ENSP_mappings:4u7d.C&PDB-ENSP_mappings:4u7d.D&PDB-ENSP_mappings:6jtz.A&PDB-ENSP_mappings:6jtz.B&AFDB-ENSP_mappings:AF-P46063-F1.A&Pfam:PF09382&PANTHER:PTHR13710&PANTHER:PTHR13710:SF72|||,ENST00000444129|ENST00000444129.7:c.1643_1644del|ENSP00000416739.2:p.His548LeufsTer8|frameshift_variant|HIGH|13/15||HGNC:9948|RECQL|Gene3D:1.10.10.10&PDB-ENSP_mappings:2v1x.A&PDB-ENSP_mappings:2v1x.B&PDB-ENSP_mappings:2wwy.A&PDB-ENSP_mappings:2wwy.B&PDB-ENSP_mappings:4u7d.A&PDB-ENSP_mappings:4u7d.B&PDB-ENSP_mappings:4u7d.C&PDB-ENSP_mappings:4u7d.D&PDB-ENSP_mappings:6jtz.A&PDB-ENSP_mappings:6jtz.B&AFDB-ENSP_mappings:AF-P46063-F1.A&Pfam:PF09382&PANTHER:PTHR13710&PANTHER:PTHR13710:SF72|||,ENST00000536851|||downstream_gene_variant|MODIFIER|||HGNC:26162|PYROXD1||||,ENST00000538582|||downstream_gene_variant|MODIFIER|||HGNC:26162|PYROXD1||||,ENST00000538615|||downstream_gene_variant|MODIFIER|||HGNC:26162|PYROXD1||||,ENST00000544970|||downstream_gene_variant|MODIFIER|||HGNC:26162|PYROXD1||||,ENSR00000452487|||regulatory_region_variant|MODIFIER|||||||| -CSQ_refseq=NM_001350912.2|||downstream_gene_variant|MODIFIER||||PYROXD1|,NM_001350913.2|||downstream_gene_variant|MODIFIER||||PYROXD1|,NM_002907.4|NM_002907.4:c.1643_1644del|NP_002898.2:p.His548LeufsTer8|frameshift_variant|HIGH|13/15|||RECQL|,NM_024854.5|||downstream_gene_variant|MODIFIER||||PYROXD1|,NM_032941.3|NM_032941.3:c.1643_1644del|NP_116559.1:p.His548LeufsTer8|frameshift_variant|HIGH|14/16|||RECQL|,XM_005253461.3|XM_005253461.3:c.1643_1644del|XP_005253518.1:p.His548LeufsTer8|frameshift_variant|HIGH|14/16|||RECQL|,XM_005253462.5|XM_005253462.5:c.1643_1644del|XP_005253519.1:p.His548LeufsTer8|frameshift_variant|HIGH|14/16|||RECQL|,XM_005253463.4|XM_005253463.4:c.1643_1644del|XP_005253520.1:p.His548LeufsTer8|frameshift_variant|HIGH|13/15|||RECQL|,XM_005253464.4|XM_005253464.4:c.1643_1644del|XP_005253521.1:p.His548LeufsTer8|frameshift_variant|HIGH|13/15|||RECQL|,XM_017019976.2|||downstream_gene_variant|MODIFIER||||PYROXD1|,XR_242902.4|||downstream_gene_variant|MODIFIER||||PYROXD1| -PHYLOP=4.112 -hexplorer_delta=-0.20 -hexplorer_mut=-2.18 -hexplorer_wt=-1.99 -hexplorer_delta_rev=-0.83 -hexplorer_mut_rev=-7.99 -hexplorer_wt_rev=-7.16 -max_hbond_delta=0.00 -max_hbond_mut=5.40 -max_hbond_wt=5.40 -dbSNP_RS=1942960300 -indel_SpliceAI=A|RECQL|0.00|0.00|0.00|0.00|23|-24|-22|-26 -ClinVar_inpret=Uncertain_significance -ClinVar_revstat=criteria_provided,_single_submitter -ClinVar_varid=2450947 -ClinVar_submissions=index,VariationID,ClinicalSignificance,DateLastEvaluated,Description,SubmittedPhenotypeInfo,ReportedPhenotypeInfo,ReviewStatus,CollectionMethod,OriginCounts,Submitter,SCV,SubmittedGeneSymbol,ExplanationOfInterpretation - -""" - - diff --git a/src/annotation_service/annotation_jobs/automatic_classification_job.py b/src/annotation_service/annotation_jobs/automatic_classification_job.py index c6de0698..3ecff98b 100644 --- a/src/annotation_service/annotation_jobs/automatic_classification_job.py +++ b/src/annotation_service/annotation_jobs/automatic_classification_job.py @@ -3,7 +3,6 @@ import common.paths as paths import common.functions as functions from common.db_IO import Connection -import tempfile import os import json import requests @@ -113,8 +112,6 @@ def save_to_db(self, variant_id, conn: Connection): selected_criteria_protein = '+'.join(selected_criteria.get("protein", []) + selected_criteria.get("general", [])) classification_protein = self.get_classification(selected_criteria_protein, scheme_type, scheme_version) - #print(classification_endpoint) - automatic_classification_id = conn.insert_automatic_classification(variant_id, scheme_id, classification_splicing, classification_protein, tool_version) for criterium_name in classification_result: @@ -167,14 +164,12 @@ def run_automatic_classification(self, autoclass_input: str, config_path: str): # "variant_json": "{\"chr\": \"17\", \"pos\": 43057110, \"gene\": \"BRCA1\", \"ref\": \"A\", \"alt\": \"C\", \"variant_type\": [\"missense_variant\"], \"variant_effect\": [{\"transcript\": \"ENST00000357654\", \"hgvs_c\": \"c.5219T>G\", \"hgvs_p\": \"p.Val1740Gly\", \"variant_type\": [\"missense_variant\"], \"exon\": 19}, {\"transcript\": \"ENST00000471181\", \"hgvs_c\": \"c.5282T>G\", \"hgvs_p\": \"p.Val1761Gly\", \"variant_type\": [\"missense_variant\"], \"exon\": 20}], \"splicing_prediction_tools\": {\"SpliceAI\": 0.5}, \"pathogenicity_prediction_tools\": {\"REVEL\": 0.5, \"BayesDel\": 0.5}, \"gnomAD\": {\"AF\": 0.007, \"AC\": 12, \"popmax\": \"EAS\", \"popmax_AF\": 0.009, \"popmax_AC\": 5}, \"FLOSSIES\": {\"AFR\": 9, \"EUR\": 130}, \"mRNA_analysis\": {\"performed\": true, \"pathogenic\": true, \"benign\": true}, \"functional_data\": {\"performed\": true, \"pathogenic\": true, \"benign\": true}, \"prior\": 0.25, \"co-occurrence\": 0.56, \"segregation\": 0.56, \"multifactorial_log-likelihood\": 0.56, \"VUS_task_force_domain\": true, \"cancer_hotspot\": true, \"cold_spot\": true}" #}' api_host = "http://" + os.environ.get("AUTOCLASS_HOST", "0.0.0.0") + ":" + os.environ.get("AUTOCLASS_PORT", "8080") + "/" - #api_host = "http://srv018.img.med.uni-tuebingen.de:5004/" endpoint = "classify_variant" url = api_host + endpoint headers = {"accept": "application/json", "Content-Type": "application/json"} data = {"config_path": config_path, "variant_json": autoclass_input} data = json.dumps(data) resp = requests.post(url, headers=headers, data=data) - #print(resp.__dict__) if resp.status_code != 200: status_code = resp.status_code @@ -189,10 +184,9 @@ def run_automatic_classification(self, autoclass_input: str, config_path: str): def get_autoclass_json(self, variant_id, conn: Connection) -> str: variant = conn.get_variant(variant_id, include_clinvar=False, include_consensus=False, include_user_classifications=False, include_heredicare_classifications=False, include_literature = False, include_external_ids = False, include_automatic_classification=False) + if variant is None: return None - - if len(variant.ref) > 15 or len(variant.alt) > 15: # cannot calculate on long insertions/deletions return None diff --git a/src/annotation_service/annotation_jobs/cancerhotspots_job.py b/src/annotation_service/annotation_jobs/cancerhotspots_job.py index 078a3936..e1e70ed1 100644 --- a/src/annotation_service/annotation_jobs/cancerhotspots_job.py +++ b/src/annotation_service/annotation_jobs/cancerhotspots_job.py @@ -4,8 +4,6 @@ import common.paths as paths import common.functions as functions from common.db_IO import Connection -import tempfile -import os import re ## this annotates various information from different vcf files @@ -55,8 +53,6 @@ def execute(self, conn: Connection): self.status = "success" - - def annotate_cancerhotspots(self, variant): status_code = 0 err_msg = "" @@ -101,8 +97,6 @@ def get_cancerhotspots_barcode(self, gene_name, hgvs_p): return '-'.join([gene_name, aa_pos, ref_aa, alt_aa]) - - def hgvs_p_useful(self, hgvs_p): if hgvs_p is None: return False @@ -116,11 +110,6 @@ def hgvs_p_useful(self, hgvs_p): return True - - - - - def save_to_db(self, result, variant_id, conn): if result is None or len(result) == 0: return @@ -145,9 +134,3 @@ def save_to_db(self, result, variant_id, conn): conn.insert_variant_annotation(variant_id, recent_annotation_ids['cancerhotspots_ac'], ac) conn.insert_variant_annotation(variant_id, recent_annotation_ids['cancerhotspots_af'], af) - - - - - - diff --git a/src/annotation_service/annotation_jobs/consequence_job.py b/src/annotation_service/annotation_jobs/consequence_job.py index 97742426..8a370739 100644 --- a/src/annotation_service/annotation_jobs/consequence_job.py +++ b/src/annotation_service/annotation_jobs/consequence_job.py @@ -2,12 +2,9 @@ from ._job import Job import common.paths as paths import common.functions as functions -import tempfile import os -from os.path import exists import urllib -from ..pubmed_parser import fetch ## this annotates various information from different vcf files class consequence_job(Job): @@ -64,14 +61,6 @@ def save_to_db(self, info, variant_id, conn): conn.delete_variant_consequences(variant_id) #FORMAT: Allele|Consequence|IMPACT|SYMBOL|HGNC_ID|Feature|Feature_type|EXON|INTRON|HGVSc|HGVSp - # CSQ= - # T|synonymous_variant|LOW|CDH1|HGNC:1748|ENST00000261769.10|Transcript|12/16||c.1896C>T|p.His632%3D, - # T|3_prime_UTR_variant&NMD_transcript_variant|MODIFIER|CDH1|HGNC:1748|ENST00000566612.5|Transcript|11/15||c.*136C>T|, - # T|synonymous_variant|LOW|CDH1|HGNC:1748|ENST00000422392.6|Transcript|11/15||c.1713C>T|p.His571%3D, - # T|3_prime_UTR_variant&NMD_transcript_variant|MODIFIER|CDH1|HGNC:1748|ENST00000566510.5|Transcript|11/15||c.*562C>T|, - # T|non_coding_transcript_exon_variant|MODIFIER|CDH1|HGNC:1748|ENST00000562836.5|Transcript|11/15||n.1967C>T|, - # T|upstream_gene_variant|MODIFIER|FTLP14|HGNC:37964|ENST00000562087.2|Transcript||||, - # T|upstream_gene_variant|MODIFIER|CDH1|HGNC:1748|ENST00000562118.1|Transcript|||| for source in sources: info_field = info_field_prefix + source + "=" csq_info = functions.find_between(info, info_field, '(;|$)') @@ -102,7 +91,6 @@ def save_to_db(self, info, variant_id, conn): hgvs_p = urllib.parse.unquote(parts[10]) #variant_id, transcript_name, hgvs_c, hgvs_p, consequence, impact, exon_nr, intron_nr, hgnc_id, symbol, consequence_source, pfam_acc - #print([variant_id, transcript_name, hgvs_c, hgvs_p, consequence, impact, exon_nr, intron_nr, hgnc_id, gene_symbol, source]) conn.insert_variant_consequence(variant_id, transcript_name, hgvs_c, hgvs_p, consequence, impact, exon_nr, intron_nr, hgnc_id, gene_symbol, source) return status_code, err_msg @@ -150,5 +138,3 @@ def annotate_consequence(self, input_vcf, output_vcf): return returncode, err_msg, vcf_errors - - \ No newline at end of file diff --git a/src/annotation_service/annotation_jobs/heredicare_job.py b/src/annotation_service/annotation_jobs/heredicare_job.py index 8eb1a471..0f0e0c02 100644 --- a/src/annotation_service/annotation_jobs/heredicare_job.py +++ b/src/annotation_service/annotation_jobs/heredicare_job.py @@ -2,7 +2,6 @@ from ._job import Job import common.paths as paths import common.functions as functions -import os from common.heredicare_interface import Heredicare import time from datetime import datetime @@ -18,6 +17,7 @@ def __init__(self, annotation_data): self.annotation_data = annotation_data self.generated_paths = [] + def do_execution(self, *args, **kwargs): result = True job_config = kwargs['job_config'] @@ -51,13 +51,10 @@ def annotate_heredicare(self, variant_id, conn): err_msg = "" heredicare_interface = Heredicare() - #conn.clear_heredicare_annotation(variant_id) heredicare_vid_annotation_type_id = conn.get_most_recent_annotation_type_id('heredicare_vid') vids = conn.get_external_ids_from_variant_id(variant_id, annotation_type_id=heredicare_vid_annotation_type_id) # the vids are imported from the import variants admin page conn.delete_unknown_heredicare_annotations(variant_id) # remove legacy annotations from vids that are deleted now - - #print(vids) for vid in vids: status = "retry" @@ -85,7 +82,7 @@ def annotate_heredicare(self, variant_id, conn): n_fam = heredicare_variant["N_FAM"] n_pat = heredicare_variant["N_PAT"] consensus_class = heredicare_variant["PATH_TF"] if heredicare_variant["PATH_TF"] != "-1" else None - comment = heredicare_variant.get("VUSTF_21", heredicare_variant["VUSTF_15"]) # use vustf21, but if it is missing fallback to vustf15 - fallback can be removed later once the production heredicare api has the vustf21 field + comment = heredicare_variant["VUSTF_21"] comment = comment.strip() if comment is not None else None classification_date = heredicare_variant["VUSTF_DATUM"] if heredicare_variant["VUSTF_DATUM"] != '' else None lr_cooc = heredicare_variant["LR_COOC"] @@ -116,7 +113,6 @@ def annotate_heredicare(self, variant_id, conn): return status_code, err_msg - def preprocess_heredicare_center_classification(self, info): if info is None: return None, None @@ -129,4 +125,3 @@ def preprocess_heredicare_center_classification(self, info): comment = None return classification, comment - diff --git a/src/annotation_service/annotation_jobs/hexplorer_job.py b/src/annotation_service/annotation_jobs/hexplorer_job.py index ac869a99..2326eedf 100644 --- a/src/annotation_service/annotation_jobs/hexplorer_job.py +++ b/src/annotation_service/annotation_jobs/hexplorer_job.py @@ -74,11 +74,6 @@ def save_to_db(self, info, variant_id, conn): def annotate_hexplorer(self, input_vcf_path, output_vcf_path): - - #if os.environ.get('WEBAPP_ENV') == 'githubtest': # use docker container installation - # command = functions.get_docker_instructions(os.environ.get("NGSBITS_CONTAINER_ID")) - # command.append("VcfAnnotateHexplorer") - #else: # use local installation command = [os.path.join(paths.ngs_bits_path, "VcfAnnotateHexplorer")] command = command + ["-in", input_vcf_path, "-out", output_vcf_path, "-ref", paths.ref_genome_path] returncode, stderr, stdout = functions.execute_command(command, 'VcfAnnotateHexplorer') @@ -87,9 +82,3 @@ def annotate_hexplorer(self, input_vcf_path, output_vcf_path): - #command = [paths.ngs_bits_path + "VcfAnnotateHexplorer", "-in", input_vcf_path, "-out", output_vcf_path, "-ref", paths.ref_genome_path] - #returncode, stderr, stdout = functions.execute_command(command, process_name = "hexplorer") - #return returncode, stderr, stdout - - - diff --git a/src/annotation_service/annotation_jobs/maxentscan_job.py b/src/annotation_service/annotation_jobs/maxentscan_job.py index 4abd5dd6..43e8d3e5 100644 --- a/src/annotation_service/annotation_jobs/maxentscan_job.py +++ b/src/annotation_service/annotation_jobs/maxentscan_job.py @@ -50,8 +50,6 @@ def execute(self, conn): self.status = "success" - - def save_to_db(self, info, variant_id, conn): status_code = 0 err_msg = "" @@ -60,8 +58,6 @@ def save_to_db(self, info, variant_id, conn): mes_annotation_id = recent_annotation_ids["maxentscan"] mes_swa_annotation_id = recent_annotation_ids["maxentscan_swa"] - #print(info) - # STANDARD MES mes_annotation = functions.find_between(info, "MES=", '(;|$)') @@ -78,6 +74,7 @@ def save_to_db(self, info, variant_id, conn): conn.insert_variant_transcript_annotation(variant_id, transcript, mes_annotation_id, '|'.join([mes_ref.strip(), mes_alt.strip()])) + # MES SWA mes_swa_annotation = functions.find_between(info, "MES_SWA=", '(;|$)') if mes_swa_annotation != '' and mes_swa_annotation is not None: @@ -104,11 +101,6 @@ def save_to_db(self, info, variant_id, conn): def annotate_maxentscan(self, input_vcf_path, output_vcf_path): - - #if os.environ.get('WEBAPP_ENV') == 'githubtest': # use docker container installation - # command = functions.get_docker_instructions(os.environ.get("NGSBITS_CONTAINER_ID")) - # command.append("VcfAnnotateHexplorer") - #else: # use local installation command = [os.path.join(paths.ngs_bits_path, "VcfAnnotateMaxEntScan")] command = command + ["-swa", "-in", input_vcf_path, "-out", output_vcf_path, "-ref", paths.ref_genome_path, "-gff", paths.ensembl_transcript_path] returncode, stderr, stdout = functions.execute_command(command, 'VcfAnnotateMaxEntScan') @@ -117,9 +109,3 @@ def annotate_maxentscan(self, input_vcf_path, output_vcf_path): - #command = [paths.ngs_bits_path + "VcfAnnotateHexplorer", "-in", input_vcf_path, "-out", output_vcf_path, "-ref", paths.ref_genome_path] - #returncode, stderr, stdout = functions.execute_command(command, process_name = "hexplorer") - #return returncode, stderr, stdout - - - diff --git a/src/annotation_service/annotation_jobs/spliceai_job.py b/src/annotation_service/annotation_jobs/spliceai_job.py index 4dda3b4e..a9e7b1a5 100644 --- a/src/annotation_service/annotation_jobs/spliceai_job.py +++ b/src/annotation_service/annotation_jobs/spliceai_job.py @@ -2,10 +2,7 @@ from ._job import Job import common.paths as paths import common.functions as functions -import tempfile -import uuid import os -from os.path import exists ## run SpliecAI on the variants which are not contained in the precomputed file @@ -19,6 +16,7 @@ def __init__(self, annotation_data): self.annotation_data = annotation_data self.generated_paths = [] + def do_execution(self, *args, **kwargs): result = True job_config = kwargs['job_config'] @@ -81,7 +79,6 @@ def annotate_from_vcf(self, config_file_path, input_vcf, output_vcf): return returncode, err_msg, vcf_errors - def save_to_db(self, info, variant_id, conn): recent_annotation_ids = conn.get_recent_annotation_type_ids() @@ -104,7 +101,6 @@ def write_vcf_annoate_config(self): return config_file_path - #','.join([str(max([float(x) for x in x.split('|')[2:6] if x != '.'])) for x in value.replace(',', '&').split('&')]) def get_spliceai_max_delta(self, spliceai_raw): spliceai_parts = spliceai_raw.replace(',', '&').split('&') @@ -159,7 +155,7 @@ def annotate_spliceai_algorithm(self, input_vcf_path, output_vcf_path): # prepare input data input_vcf_zipped_path = input_vcf_path + ".gz" - # gbzip and index the input file as this is required for spliceai... + # bgzip and index the input file as this is required for spliceai... returncode, stderr, stdout = functions.execute_command([os.path.join(paths.htslib_path, 'bgzip'), '-f', '-k', input_vcf_path], 'bgzip') if returncode != 0: return returncode, "SpliceAI bgzip error:" + stderr, stdout @@ -169,6 +165,7 @@ def annotate_spliceai_algorithm(self, input_vcf_path, output_vcf_path): return returncode, "SpliceAI tabix error: " + stderr, stdout # execute spliceai + # -M: masked scores! command = ['spliceai', '-I', input_vcf_zipped_path, '-O', output_vcf_path, '-R', paths.ref_genome_path, '-A', paths.ref_genome_name.lower(), '-M', '1'] returncode, stderr, stdout = functions.execute_command(command, 'SpliceAI') diff --git a/src/annotation_service/annotation_jobs/task_force_protein_domain_job.py b/src/annotation_service/annotation_jobs/task_force_protein_domain_job.py index 0c513812..ac1f1366 100644 --- a/src/annotation_service/annotation_jobs/task_force_protein_domain_job.py +++ b/src/annotation_service/annotation_jobs/task_force_protein_domain_job.py @@ -14,6 +14,7 @@ def __init__(self, annotation_data): self.annotation_data = annotation_data self.generated_paths = [] + def do_execution(self, *args, **kwargs): result = True job_config = kwargs['job_config'] @@ -35,7 +36,7 @@ def execute(self, conn): if status_code != 0: self.status = "error" self.err_msg = err_msg - return # abort execution + return # abort # update state self.status = "success" @@ -53,5 +54,3 @@ def save_to_db(self, variant, conn): return status_code, err_msg - - diff --git a/src/annotation_service/annotation_jobs/vep_job.py b/src/annotation_service/annotation_jobs/vep_job.py index a3e1322e..d40ad911 100644 --- a/src/annotation_service/annotation_jobs/vep_job.py +++ b/src/annotation_service/annotation_jobs/vep_job.py @@ -4,7 +4,6 @@ import common.functions as functions import re import os -import urllib.parse from common.db_IO import Connection from ..pubmed_parser import fetch @@ -63,7 +62,6 @@ def save_to_db(self, info, variant_id, conn: Connection): transcript_specific_annotation_type_ids = conn.get_recent_annotation_type_ids(only_transcript_specific = True) pfam_annotation_id = transcript_specific_annotation_type_ids["pfam_domains"] - # !!!! format of annotations from vep need to be equal: 0Feature,1HGVSc,2HGVSp,3Consequence,4IMPACT,5EXON,6INTRON,7HGNC_ID,8SYMBOL,9DOMAIN,...additional info csq_info = functions.find_between(info, "CSQ=", '(;|$)') if csq_info == '' or csq_info is None: @@ -92,13 +90,11 @@ def save_to_db(self, info, variant_id, conn: Connection): if pmids != '': literature_entries = fetch(pmids) # defined in pubmed_parser.py for paper in literature_entries: #[pmid, article_title, authors, journal, year] - #print(paper[0]) conn.insert_variant_literature(variant_id, paper[0], paper[1], paper[2], paper[3], paper[4], "vep") return status_code, err_msg - #"/mnt/storage2/GRCh38/share/data/genomes/GRCh38.fa" def _annotate_vep(self, input_vcf, output_vcf): fields_oi_base = "Feature,HGVSc,HGVSp,Consequence,IMPACT,EXON,INTRON,HGNC_ID,SYMBOL,DOMAINS" command = [os.path.join(paths.vep_path, "vep"), diff --git a/src/annotation_service/annotation_queue.py b/src/annotation_service/annotation_queue.py index 55518c13..a74d0070 100644 --- a/src/annotation_service/annotation_queue.py +++ b/src/annotation_service/annotation_queue.py @@ -37,6 +37,7 @@ def __init__(self, annotation_data): automatic_classification_job(annotation_data) # must be called last ] + def execute(self, conn): err_msg = "" status = "success" @@ -50,8 +51,6 @@ def execute(self, conn): return status, err_msg - - def collect_error_msgs(self, msg1, msg2): res = msg1 if msg2 not in msg1: diff --git a/src/annotation_service/main.py b/src/annotation_service/main.py index 58741df1..7eea3be2 100644 --- a/src/annotation_service/main.py +++ b/src/annotation_service/main.py @@ -3,16 +3,11 @@ sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from common.db_IO import Connection import common.functions as functions -import tempfile import traceback from urllib.error import HTTPError -from os.path import exists from .annotation_queue import Annotation_Queue from .annotation_data import Annotation_Data -import random -from mysql.connector import Error, InternalError - -import os +from mysql.connector import InternalError ## configuration @@ -90,11 +85,9 @@ def process_one_request(annotation_queue_id, job_config = get_default_job_config #if random.randint(1,10) > 5: # raise HTTPError(url = "srv18", code=429, msg="Too many requests", hdrs = {}, fp = None) - # initialize the connection conn = Connection(roles=["annotation"]) - # get the variant_id from the annotation queue id & check that the annotation_queue_id is valid annotation_queue_entry = conn.get_annotation_queue_entry(annotation_queue_id) if annotation_queue_entry is None: @@ -102,7 +95,6 @@ def process_one_request(annotation_queue_id, job_config = get_default_job_config return status, "Annotation queue entry not found" variant_id = annotation_queue_entry[1] - # check the variant type variant = conn.get_variant(variant_id, include_annotations = False, include_consensus = False, include_user_classifications = False, include_heredicare_classifications = False, include_automatic_classification = False, include_clinvar = False, include_assays = False, include_literature = False, include_external_ids = False) # 0id,1chr,2pos,3ref,4alt if variant.variant_type in ['sv']: @@ -142,15 +134,14 @@ def process_one_request(annotation_queue_id, job_config = get_default_job_config # cleanup after http error before retry print("An HTTP exception occured: " + str(e)) print(traceback.format_exc()) - status = "retry" conn.update_annotation_queue(annotation_queue_id, status=status, error_msg=str(e)) - runtime_error = str(e) - #raise e #HTTPError(url = e.url, code = e.code, msg = "A HTTP error occured", hdrs = e.hdrs, fp = e.fp) except InternalError as e: # deadlock: code 1213 + print("An exception occured: " + str(e)) + print(traceback.format_exc()) status = "retry" conn.update_annotation_queue(annotation_queue_id, status=status, error_msg=str(e)) runtime_error = "Attempting retry because of database error: " + str(e) + ' ' + traceback.format_exc() @@ -162,17 +153,13 @@ def process_one_request(annotation_queue_id, job_config = get_default_job_config status = "error" runtime_error = str(e) - + # cleanup functions.rm(vcf_path) - - conn.close() - return status, runtime_error - # sequentially processes all pending requests #pending_requests = conn.get_pending_requests() <- call this to get the pending requests! # not used atm!!! @@ -181,6 +168,3 @@ def process_all_pending_requests(pending_requests): for pending_request in pending_requests: process_one_request(pending_request[0]) - - - diff --git a/src/annotation_service/pubmed_parser.py b/src/annotation_service/pubmed_parser.py index 98098cba..2a10ff4e 100644 --- a/src/annotation_service/pubmed_parser.py +++ b/src/annotation_service/pubmed_parser.py @@ -32,7 +32,6 @@ def parse_record(rec, doc_section, article_section, title_section, journal_secti pmid = rec[doc_section]['PMID'] rec = rec[doc_section][article_section] - #print(rec) year_section = rec[date_section] if not book: diff --git a/src/common/clinvar_interface.py b/src/common/clinvar_interface.py index 305f3447..1f78718d 100644 --- a/src/common/clinvar_interface.py +++ b/src/common/clinvar_interface.py @@ -4,14 +4,10 @@ import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import common.functions as functions -from common.xml_validator import xml_validator from common.singleton import Singleton from . import paths -from datetime import datetime, timedelta -import re import json -import urllib import jsonschema @@ -27,8 +23,6 @@ def __init__(self): } #print("NEW INSTANCE") - #os.environ.get('CLINVAR_API_KEY') - def get_url(self, endpoint): return "/".join([self.base_url, self.endpoints[endpoint]]) @@ -128,7 +122,6 @@ def get_clinvar_submission_json(self, variant, selected_gene, clinvar_accession clinvar_submission.append(clinvar_submission_properties) data['germlineSubmission'] = clinvar_submission - #print(data) return data @@ -158,7 +151,6 @@ def get_postable_consensus_classification(self, variant, selected_gene, clinvar_ "data": {"content": data} }] } - print(postable_data) return postable_data def post_consensus_classification(self, variant, selected_gene, clinvar_accession): @@ -192,7 +184,6 @@ def post_consensus_classification(self, variant, selected_gene, clinvar_accessio submission_id = resp.json()['id'] return submission_id, status, message - # returns None if there was an ERROR diff --git a/src/common/db_IO.py b/src/common/db_IO.py index f4f0acae..cfff799e 100644 --- a/src/common/db_IO.py +++ b/src/common/db_IO.py @@ -1,17 +1,12 @@ -from logging import raiseExceptions from os import path import sys sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) -import mysql.connector -from mysql.connector import Error import common.functions as functions import common.models as models -from operator import itemgetter -import datetime -import re +import mysql.connector +from mysql.connector import Error from functools import cmp_to_key import os -import html # html.escape(s) def get_db_connection(roles): @@ -67,12 +62,16 @@ def set_connection_encoding(self): self.cursor.execute("SET NAMES 'utf8'") self.cursor.execute("SET CHARACTER SET utf8") self.cursor.execute('SET character_set_connection=utf8;') + + + def close(self): + self.conn.close() + self.cursor.close() # This function removes ALL occurances of duplicated items def remove_duplicates(self, table, unique_column): command = "DELETE FROM " + table + " WHERE " + unique_column + " IN (SELECT * FROM (SELECT " + unique_column + " FROM " + table + " GROUP BY " + unique_column + " HAVING (COUNT(*) > 1)) AS A)" - #command = "DELETE FROM " + table + " WHERE " + unique_column + " IN (SELECT * FROM (SELECT %s FROM %s GROUP BY %s HAVING (COUNT(*) > 1)) AS A)" self.cursor.execute(command) self.conn.commit() @@ -88,14 +87,6 @@ def get_gene_id_by_hgnc_id(self, hgnc_id): return None return result[0][0] # subset the result as fetching only one column still returns a tuple! - def close(self): - self.conn.close() - self.cursor.close() - - #def get_pending_requests(self): - # self.cursor.execute("SELECT id,variant_id,user_id FROM annotation_queue WHERE status = 'pending'") - # pending_variant_ids = self.cursor.fetchall() - # return pending_variant_ids def get_annotation_queue(self, status = []): placeholders = self.get_placeholders(len(status)) @@ -105,7 +96,6 @@ def get_annotation_queue(self, status = []): return result - def update_annotation_queue(self, annotation_queue_id, status, error_msg): error_msg = error_msg.replace("\n", " ") #print("UPDATE annotation_queue SET status = " + status + ", finished_at = " + time.strftime('%Y-%m-%d %H:%M:%S') + ", error_message = " + error_msg + " WHERE id = " + str(row_id)) @@ -156,11 +146,6 @@ def get_pfam_description_by_pfam_acc(self, pfam_acc): def insert_variant_consequence(self, variant_id, transcript_name, hgvs_c, hgvs_p, consequence, impact, exon_nr, intron_nr, hgnc_id, symbol, consequence_source): columns_with_info = "variant_id, transcript_name, consequence, impact, source" actual_information = (variant_id, transcript_name, consequence, impact, consequence_source) - #if pfam_acc != '': - # pfam_acc, domain_description = self.get_pfam_description_by_pfam_acc(pfam_acc) - # if domain_description is not None and pfam_acc is not None and domain_description != 'removed': - # columns_with_info = columns_with_info + ", pfam_accession, pfam_description" - # actual_information = actual_information + (pfam_acc, domain_description) if hgvs_c != '': columns_with_info = columns_with_info + ", hgvs_c" actual_information = actual_information + (hgvs_c,) @@ -177,12 +162,6 @@ def insert_variant_consequence(self, variant_id, transcript_name, hgvs_c, hgvs_p hgnc_id = functions.trim_hgnc(hgnc_id) columns_with_info = columns_with_info + ", hgnc_id" actual_information = actual_information + (hgnc_id, ) - #gene_id = self.get_gene_id_by_hgnc_id(hgnc_id) - #if gene_id is not None: - # columns_with_info = columns_with_info + ", gene_id" - # actual_information = actual_information + (gene_id,) - #else: - # print("WARNING: there was no row in the gene table for hgnc_id " + str(hgnc_id) + ". geneid will be empty even though hgncid was given. Error occured during insertion of variant consequence: " + str(variant_id) + ", " + str(transcript_name) + ", " + str(hgvs_c) + ", " +str(hgvs_p) + ", " +str(consequence) + ", " + str(impact) + ", " + str(exon_nr) + ", " + str(intron_nr) + ", " + str(hgnc_id) + ", " + str(symbol) + ", " + str(consequence_source)) elif symbol != '': gene_id = self.get_gene_id_by_symbol(symbol) if gene_id is not None: @@ -194,10 +173,6 @@ def insert_variant_consequence(self, variant_id, transcript_name, hgvs_c, hgvs_p placeholders = "%s, "*len(actual_information) placeholders = placeholders[:len(placeholders)-2] command = "INSERT INTO variant_consequence (" + columns_with_info + ") VALUES (" + placeholders + ")" - #command = "INSERT INTO variant_consequence (" + columns_with_info + ") \ - # SELECT " + placeholders + " FROM DUAL WHERE NOT EXISTS (SELECT * FROM variant_consequence \ - # WHERE " + columns_with_info.replace(', ', '=%s AND ') + '=%s ' + " LIMIT 1)" - #actual_information = actual_information * 2 self.cursor.execute(command, actual_information) self.conn.commit() @@ -251,10 +226,6 @@ def insert_gene_alias(self, hgnc_id, symbol): def insert_variant_annotation(self, variant_id, annotation_type_id, value, supplementary_document = None): # supplementary documents are not supported yet! see: https://stackoverflow.com/questions/10729824/how-to-insert-blob-and-clob-files-in-mysql - #command = "INSERT INTO variant_annotation (`variant_id`, `annotation_type_id`, `value`) \ - # SELECT %s, %s, %s FROM DUAL WHERE NOT EXISTS (SELECT * FROM variant_annotation \ - # WHERE `variant_id`=%s AND `annotation_type_id`=%s AND `value`=%s LIMIT 1)" - #self.cursor.execute(command, (variant_id, annotation_type_id, value, variant_id, annotation_type_id, value)) command = "INSERT INTO variant_annotation (`variant_id`, `annotation_type_id`, `value`) VALUES (%s, %s, %s) ON DUPLICATE KEY UPDATE `value`=%s" self.cursor.execute(command, (variant_id, annotation_type_id, value, value)) self.conn.commit() @@ -283,10 +254,7 @@ def insert_sv_variant(self, chrom, start, end, sv_type, imprecise): # insert data into variant table to generate the variant_id variant_id = self.insert_variant(chrom, pos, ref, alt, None,None,None,None,None, 'sv', sv_variant_id) - #command = "INSERT INTO variant (chr, pos, ref, alt, orig_chr, orig_pos, orig_ref, orig_alt, variant_type, sv_variant_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, 'sv', %s)" - #self.cursor.execute(command, (chrom, pos, ref, alt, None,None,None,None,None, 'sv', sv_variant_id)) - #self.conn.commit() - #variant_id = self.get_variant_id_by_sv_variant_id(sv_variant_id) + return variant_id, sv_variant_id @@ -332,30 +300,14 @@ def get_overlapping_genes(self, chrom, start, end): # function for structural va return result def insert_external_variant_id(self, variant_id, external_id, annotation_type_id): - #command = "DELETE FROM variant_ids WHERE external_id = %s AND annotation_type_id = %s and variant_id != %s" - #self.cursor.execute(command, (external_id, annotation_type_id, variant_id)) - #self.conn.commit() command = "INSERT INTO variant_ids (variant_id, external_id, annotation_type_id) \ SELECT %s, %s, %s FROM DUAL WHERE NOT EXISTS (SELECT * FROM variant_ids \ WHERE `variant_id`=%s AND `external_id`=%s AND `annotation_type_id`=%s LIMIT 1)" self.cursor.execute(command, (variant_id, external_id, annotation_type_id, variant_id, external_id, annotation_type_id)) self.conn.commit() - - #def update_external_variant_id(self, variant_id, external_id, annotation_type_id): - # command = "UPDATE variant_ids SET external_id = %s WHERE variant_id = %s AND annotation_type_id = %s" - # self.cursor.execute(command, (external_id, variant_id, annotation_type_id)) - # self.conn.commit() -# - #def insert_update_external_variant_id(self, variant_id, external_id, annotation_type_id): - # previous_external_variant_id = self.get_external_ids_from_variant_id(variant_id, annotation_type_id=annotation_type_id) - # #print(previous_external_variant_id) - # if (len(previous_external_variant_id) == 1): # do update - # self.update_external_variant_id(variant_id, external_id, annotation_type_id) - # else: # save new - # self.insert_external_variant_id(variant_id, external_id, annotation_type_id) + def insert_annotation_request(self, variant_id, user_id): # this inserts only if there is not an annotation request for this variant which is still pending - #command = "INSERT INTO annotation_queue (variant_id, status, user_id) VALUES (%s, %s, %s)" command = "INSERT INTO annotation_queue (`variant_id`, `user_id`) \ SELECT %s, %s FROM DUAL WHERE NOT EXISTS (SELECT * FROM annotation_queue \ WHERE `variant_id`=%s AND `status`='pending' LIMIT 1)" @@ -394,11 +346,6 @@ def insert_clinvar_submission(self, clinvar_variant_annotation_id, interpretatio self.conn.commit() def get_clinvar_variant_annotation_id_by_variant_id(self, variant_id): - #command = "SELECT a.id,a.variant_id,a.version_date \ - # FROM clinvar_variant_annotation a \ - # INNER JOIN ( \ - # SELECT variant_id, max(version_date) AS version_date FROM clinvar_variant_annotation GROUP BY variant_id \ - # ) b ON a.variant_id = b.variant_id AND a.variant_id = " + functions.enquote(variant_id) + " AND a.version_date = b.version_date" command = "SELECT id FROM clinvar_variant_annotation WHERE variant_id=%s" self.cursor.execute(command, (variant_id, )) result = self.cursor.fetchone() @@ -494,7 +441,7 @@ def insert_pfam_id_mapping(self, accession_id, description): command = "INSERT INTO pfam_id_mapping (accession_id, description) VALUES (%s, %s)" self.cursor.execute(command, (accession_id, description)) self.conn.commit() - + def insert_pfam_legacy(self, old_accession_id, new_accession_id): #remove version numbers first @@ -527,7 +474,6 @@ def get_task_force_protein_domains(self, chromosome, variant_start, variant_end) return result def insert_variant_literature(self, variant_id, pmid, title, authors, journal, year, source): - #command = "INSERT INTO variant_literature (variant_id, pmid, title, authors, journal_publisher, year) VALUES (%s, %s, %s, %s, %s, %s)" command = "INSERT INTO variant_literature (variant_id, pmid, title, authors, journal_publisher, year, source) \ SELECT %s, %s, %s, %s, %s, %s, %s FROM DUAL WHERE NOT EXISTS (SELECT * FROM variant_literature \ WHERE `variant_id`=%s AND `pmid`=%s LIMIT 1)" @@ -676,80 +622,6 @@ def is_hgnc(self, string): return False return True - #def get_variant_more_info(self, variant_id, user_id = None): - # command = "SELECT * FROM variant WHERE id = %s" - # command = self.annotate_genes(command) - # command = self.annotate_consensus_classification(command) - # actual_information = (variant_id, ) - # if user_id is not None: - # command = self.annotate_specific_user_classification(command) - # actual_information += (user_id, ) - # self.cursor.execute(command, actual_information) - # result = self.cursor.fetchone() - # return result - - ## these functions add additional columns to the variant table - #def annotate_genes(self, command): - # prefix = """ - # SELECT id, chr, pos, ref, alt, group_concat(gene_id SEPARATOR '; ') as gene_id, group_concat(symbol SEPARATOR '; ') as symbol FROM ( - # SELECT * FROM ( - # """ - # postfix = """ - # ) a LEFT JOIN ( - # SELECT DISTINCT variant_id, gene_id FROM variant_consequence WHERE gene_id IS NOT NULL) b ON a.id=b.variant_id - # ) c LEFT JOIN ( - # SELECT id AS gene_id_2, symbol FROM gene WHERE id - # ) d ON c.gene_id=d.gene_id_2 - # GROUP BY id, chr, pos, ref, alt - # """ - # return prefix + command + postfix - - #def annotate_specific_user_classification(self, command): - # prefix = """ - # SELECT g.*, h.user_classification FROM ( - # """ - # postfix = """ - # ) g LEFT JOIN ( - # SELECT user_classification.variant_id, user_classification.classification as user_classification FROM user_classification - # LEFT JOIN user_classification x ON x.variant_id = user_classification.variant_id AND x.date > user_classification.date - # WHERE x.variant_id IS NULL AND user_classification.user_id=%s - # ORDER BY user_classification.variant_id) h ON g.id = h.variant_id ORDER BY chr, pos, ref, alt - # """ - # return prefix + command + postfix - - #def annotate_consensus_classification(self, command): - # prefix = """ - # SELECT e.*, f.classification FROM ( - # """ - # postfix = """ - # ) e LEFT JOIN ( - # SELECT variant_id, classification FROM consensus_classification WHERE is_recent=1) f ON e.id = f.variant_id - # """ - # return prefix + command + postfix - - #### DEPRECATED! - ## this function returns a list of variant tuples (can have length more than one if there are multiple mane select transcripts for this variant) - #def annotate_preferred_transcripts(self, variant): - # result = [] - # consequences = self.get_variant_consequences(variant_id = variant[0]) - # if consequences is not None: - # consequences = self.order_consequences(consequences) - # best_consequence = consequences[0] - # - # if best_consequence[14] == 1: # if the best one is a mane select transcript scan also the following and add them as well - # for consequence in consequences: - # if consequence[14] == 1: # append one variant entry for each mane select transcript (in case of multiple genes, usually a low number) - # result.append(variant + consequence) - # else: - # break # we can do this because the list is sorted - # else: # in case the best consequence is no mane select transcrip - # result.append(variant + best_consequence) -# - # else: - # result.append(variant + (None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)) - # return result - - def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, user_id, ranges = None, genes = None, consensus = None, user = None, automatic_splicing = None, automatic_protein = None, @@ -794,7 +666,6 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use if len(transcripts) == 0: gene_id = self.get_gene_id_by_symbol(source) transcripts = self.get_preferred_transcripts(gene_id, return_all = False) - #print("Transcripts: " + str(transcripts)) for transcript in transcripts: chrom = transcript.chrom start_pos = self.cdna_pos_to_genomic_pos(transcript.id, start, transcript.orientation, start_modifier, beyond_cds_operation_start) @@ -853,7 +724,6 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use actual_information += (annotation_type_id, value) postfix = self.add_constraints_to_command(postfix, new_constraints) if genes is not None and len(genes) > 0: - #genes = [self.get_gene(self.convert_to_gene_id(x))[1] for x in genes] hgnc_ids = set() for gene in genes: current_gene_id = self.convert_to_gene_id(gene) @@ -878,7 +748,6 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use parts = variant_string.split('-') parts[0] = 'chr' + parts[0] if not parts[0].startswith('chr') else parts[0] list_of_constraints.append(["(chr = %s AND pos = %s AND ref = %s AND alt = %s)", "(chrom = %s AND start = %s AND end = %s AND sv_type LIKE %s)"]) - #list_of_constraints.append("(SELECT id FROM variant WHERE chr = %s AND pos = %s AND ref = %s AND alt = %s UNION SELECT variant.id FROM variant WHERE sv_variant_id IN (SELECT id FROM sv_variant WHERE chrom = %s AND start = %s AND end = %s AND sv_type LIKE %s))") list_of_information.append([[parts[0], parts[1], parts[2], parts[3]], [parts[0], parts[1], parts[2], functions.enpercent(parts[3])]]) restrictions1 = " OR ".join([x[0] for x in list_of_constraints]) restrictions2 = " OR ".join([x[1] for x in list_of_constraints]) @@ -904,7 +773,6 @@ def get_variants_page_merged(self, page, page_size, sort_by, include_hidden, use new_constraints_inner = new_constraints_inner + "SELECT variant_id FROM consensus_classification WHERE classification IN " + placeholders + " AND is_recent = 1" actual_information += tuple(consensus_without_dash) new_constraints = "variant.id IN (" + new_constraints_inner + ")" - #postfix = self.add_constraints_to_command(postfix, new_constraints) constraints_complete = new_constraints if include_heredicare_consensus and len(consensus_without_dash) > 0: heredicare_consensus = [] @@ -1122,7 +990,6 @@ def cdna_pos_to_genomic_pos(self, transcript_id, cdna_pos, orientation, modifier else: command = self.add_constraints_to_command(command, "cdna_start <= %s AND cdna_end >= %s") actual_information += (cdna_pos, cdna_pos) - #command = "SELECT start, cdna_start FROM exon WHERE transcript_id = %s AND cdna_start <= %s AND cdna_end >= %s" self.cursor.execute(command, actual_information) result = self.cursor.fetchone() if result is None: @@ -1192,8 +1059,6 @@ def get_variant_ids_from_gene_and_hgvs(self, gene, hgvs_c, source = 'ensembl'): sortable_dict = {} for c in current_batch: sortable_dict[c[0]] = c - #current_batch = self.order_consequences(current_batch) - # THIS SHOULD BE REPLACED WITH SOMETHING LIKE THIS: transcripts, current_batch = functions.sort_transcript_dict(sortable_dict) best_consequence = current_batch[0] @@ -1212,61 +1077,15 @@ def get_variant_ids_from_gene_and_hgvs(self, gene, hgvs_c, source = 'ensembl'): return matching_variant_ids - ## DEPRECATED - #def order_consequences(self, consequences): - # keyfunc = cmp_to_key(mycmp = self.sort_consequences) - # - # consequences.sort(key = keyfunc) # sort by preferred transcript - # return consequences - - ## DEPRECATED - #def sort_consequences(self, a, b): - # # sort by ensembl/refseq - # if a[9] == 'ensembl' and b[9] == 'refseq': - # return -1 - # elif a[9] == 'refseq' and b[9] == 'ensembl': - # return 1 - # elif a[9] == b[9]: - # - # # sort by mane select - # if a[14] is None or b[14] is None: - # return 1 - # elif a[14] == 1 and b[14] == 0: - # return -1 - # elif a[14] == 0 and b[14] == 1: - # return 1 - # elif a[14] == b[14]: - # - # # sort by biotype - # if a[18] == 'protein coding' and b[18] != 'protein coding': - # return -1 - # elif a[18] != 'protein coding' and b[18] == 'protein coding': - # return 1 - # elif (a[18] != 'protein coding' and b[18] != 'protein coding') or (a[18] == 'protein coding' and b[18] == 'protein coding'): - # - # # sort by length - # if a[12] > b[12]: - # return -1 - # elif a[12] < b[12]: - # return 1 - # else: - # return 0 - - - - - - """ - def get_mane_select_for_gene(self, gene, source): - gene_id = self.convert_to_gene_id(gene) - command = "SELECT DISTINCT transcript_name FROM variant_consequence WHERE transcript_name IN (SELECT name FROM transcript WHERE is_mane_select=1) AND gene_id=%s AND source=%s" - self.cursor.execute(command, (gene_id, source)) - result = self.cursor.fetchone() - if result is not None: - return result[0] - return None - """ + #def get_mane_select_for_gene(self, gene, source): + # gene_id = self.convert_to_gene_id(gene) + # command = "SELECT DISTINCT transcript_name FROM variant_consequence WHERE transcript_name IN (SELECT name FROM transcript WHERE is_mane_select=1) AND gene_id=%s AND source=%s" + # self.cursor.execute(command, (gene_id, source)) + # result = self.cursor.fetchone() + # if result is not None: + # return result[0] + # return None def get_mane_select_for_gene(self, gene_id): command = "SELECT name FROM transcript WHERE gene_id = %s AND is_mane_select=1" @@ -1322,10 +1141,7 @@ def get_variant_consequences(self, variant_id): ON gene.hgnc_id = y.hgnc_id \ ) x \ ON transcript.name = x.transcript_name" - #import time - #start_time = time.time() self.cursor.execute(command, (variant_id, )) - #print("--- consequences: %s seconds ---" % (time.time() - start_time)) result = self.cursor.fetchall() #result = sorted(result, key=lambda x: functions.convert_none_infinite(x[12]), reverse=True) # sort table by transcript length @@ -1669,7 +1485,6 @@ def insert_scheme_criterium_applied(self, classification_id, classification_crit def get_scheme_criteria_applied(self, classification_id, where="user"): if where == "user": - # command = "SELECT * FROM user_classification_criteria_applied WHERE user_classification_id=%s" table_oi = "user_classification_criteria_applied" prefix = "user" elif where == "consensus": @@ -1768,28 +1583,6 @@ def update_user_classification(self, user_classification_id, classification, com self.cursor.execute(command, (str(classification), comment, date, str(scheme_class), user_classification_id)) self.conn.commit() - #def delete_variant(self, variant_id): - # status = "deleted" - # message = "Deleted variant " + str(variant_id) - # consensus_classification = self.get_consensus_classification(variant_id) - # if consensus_classification is None: - # consensus_classification = [] - # if len(consensus_classification) > 0: # do not delete if the variant has a consensus classification - # status = "skipped" - # message = "Did not delete variant because it has consensus classifications" - # return status, message - # user_classifications = self.get_user_classifications(variant_id) - # if user_classifications is None: - # user_classifications = [] - # if len(user_classifications) > 0: # do not delete if the variant has a user classification - # status = "skipped" - # message = "Did not delete variant because it has user classifications" - # return status, message - # command = "DELETE FROM variant WHERE id = %s" - # self.cursor.execute(command, (variant_id,)) - # self.conn.commit() - # return status, message - @@ -1820,13 +1613,6 @@ def get_import_request(self, import_queue_id): import_request = self.convert_raw_import_request(import_request_raw) return import_request - #def get_import_request_overview(self): - # command = "SELECT id FROM import_queue ORDER BY requested_at DESC" - # self.cursor.execute(command) - # import_queue_ids = self.cursor.fetchall() - # import_requests = [self.get_import_request(import_queue_id[0]) for import_queue_id in import_queue_ids] - # return import_requests - def get_import_requests_page(self, page, page_size): # get one page of import requests determined by offset & pagesize command = "SELECT id FROM import_queue ORDER BY requested_at DESC LIMIT %s, %s" @@ -2103,23 +1889,6 @@ def update_variant_annotation(self, variant_id, annotation_type_id, value): # us self.cursor.execute(command, (value, variant_id, annotation_type_id)) self.conn.commit() - """ - def get_import_request(self, import_queue_id = '', date = ''): - command = '' - if import_queue_id != '': - command = 'SELECT * FROM import_queue WHERE id = %s' - information = (import_queue_id, ) - if date != '': - date_parts = date.split('-') - information = (date_parts[0] + '-' + date_parts[1] + '-' + date_parts[2] + ' ' + date_parts[3] + ':' + date_parts[4] + ':' + date_parts[5], ) - command = 'SELECT * FROM import_queue WHERE requested_at = %s' - - if command != '': - self.cursor.execute(command, information) - res = self.cursor.fetchone() - return res - return None - """ def get_heredicare_center_classifications(self, heredicare_annotation_id): command = 'SELECT id, heredicare_ZID, (SELECT name FROM heredicare_ZID WHERE heredicare_ZID.ZID = heredicare_center_classification.heredicare_ZID) as center_name, classification, comment FROM heredicare_center_classification WHERE variant_heredicare_annotation_id = %s' @@ -2127,13 +1896,9 @@ def get_heredicare_center_classifications(self, heredicare_annotation_id): result = self.cursor.fetchall() if len(result) == 0: return None - #result = sorted(result, key=lambda x: functions.convert_none_infinite(x[5]), reverse=True) return result def insert_user(self, username, first_name, last_name, affiliation, api_roles): - #command = "INSERT INTO user (username, first_name, last_name, affiliation) \ - # SELECT %s FROM DUAL WHERE NOT EXISTS (SELECT * FROM user \ - # WHERE `username`=%s LIMIT 1)" command = "INSERT INTO user (username, first_name, last_name, affiliation, api_roles) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE first_name=%s, last_name=%s, affiliation=%s, api_roles=%s" self.cursor.execute(command, (username, first_name, last_name, affiliation, api_roles, first_name, last_name, affiliation, api_roles)) self.conn.commit() @@ -2277,20 +2042,6 @@ def check_list_permission(self, user_id, list_id): result['edit'] = True return result - #### DELETE LATER! - #def check_user_list_ownership(self, user_id, list_id, requests_write=False): - # inner_command = "SELECT * FROM user_variant_lists WHERE (user_id = %s OR public_read = 1)" - # if requests_write: - # inner_command += " AND public_edit = 1" - # inner_command += " AND id = %s" - # command = "SELECT EXISTS (" + inner_command + ")" - # self.cursor.execute(command, (user_id, list_id)) - # result = self.cursor.fetchone() - # result = result[0] - # if result == 1: - # return True - # else: - # return False # this is used in tests def get_latest_list_id(self): @@ -2343,14 +2094,12 @@ def add_lists(self, first_list_id, second_list_id, target_list_id): first_list_variant_ids = self.get_variant_ids_from_list(first_list_id) second_list_variant_ids = self.get_variant_ids_from_list(second_list_id) variant_ids_for_target_list = list(set(first_list_variant_ids) | set(second_list_variant_ids)) - #print(variant_ids_for_target_list) self.clear_list(target_list_id) for variant_id in variant_ids_for_target_list: self.add_variant_to_list(target_list_id, variant_id) def clear_list(self, list_id): - #"DELETE FROM " + db_table + " WHERE classification_id = %s AND pmid = %s" command = "DELETE FROM list_variants WHERE list_id = %s" self.cursor.execute(command, (list_id, )) self.conn.commit() @@ -2387,7 +2136,6 @@ def get_one_sv_variant(self, variant_id): def get_variant_id(self, chr, pos, ref, alt): - #command = "SELECT id FROM variant WHERE chr = " + functions.enquote(chr) + " AND pos = " + str(pos) + " AND ref = " + functions.enquote(ref) + " AND alt = " + functions.enquote(alt) chr, chr_valid = functions.curate_chromosome(chr) if not chr_valid: return None @@ -2772,7 +2520,6 @@ def get_variant(self, variant_id, intron = consequence[6] ) consequences.append(new_consequence) - assays = None if include_assays: @@ -2799,7 +2546,6 @@ def get_variant(self, variant_id, assay_metadata_dict[assay_metadata_type.title] = assay_metadata new_assay = models.Assay(id = int(assay_id), assay_type_id = assay_type_id, type_title=assay_type_name, metadata = assay_metadata_dict, date = date, link = link) assays.append(new_assay) - literature = None if include_literature: @@ -2940,14 +2686,7 @@ def get_assay_metadata_types(self, assay_type_id, format = "dict"): result[assay_metadata_type.title] = assay_metadata_type return result - #def get_assay_type_id_dict(self): - # command = "SELECT id, title FROM assay_type" - # self.cursor.execute(command) - # res = self.cursor.fetchall() - # d = {} - # for elem in res: - # d[elem[1]] = elem[0] - # return d + def get_assay_id(self, assay_title): command = "SELECT id FROM assay_type WHERE title = %s" self.cursor.execute(command, (assay_title, )) @@ -2969,16 +2708,6 @@ def get_assay_types(self) -> dict: result[assay_type_id] = {"title": assay_title, "metadata_types": assay_metadata_types} return result - # DEPRECATED: delete later - #def valid_assay_type_id(self, assay_type_id): - # command = "SELECT EXISTS (SELECT id FROM assay_type WHERE id = %s)" - # self.cursor.execute(command, (assay_type_id, )) - # result = self.cursor.fetchone()[0] # get the first element as result is always a tuple - # if result == 0: - # return False - # return True - - def insert_assay_metadata(self, assay_id, assay_metadata_type_id, value): if value is not None: command = "INSERT INTO assay_metadata (assay_id, assay_metadata_type_id, value) VALUES (%s, %s, %s)" @@ -2993,8 +2722,6 @@ def convert_assay_metadata_type(self, assay_metadata_type_raw): return assay_metadata_type - - def get_last_insert_id(self): command = "SELECT LAST_INSERT_ID()" self.cursor.execute(command) @@ -3176,12 +2903,6 @@ def get_heredivar_clinvar_submission(self, variant_id): result = self.cursor.fetchone() return result - #def update_heredivar_clinvar_submission_accession_id(self, accession_id): - # command = "UPDATE publish_clinvar_queue SET accession_id = %s" - # self.cursor.execute(command, (accession_id, )) - # self.conn.commit() - - def get_current_annotation_staus_all_variants(self): command = """ SELECT a1.variant_id, a1.user_id, a1.requested, a1.status, a1.finished_at, a1.error_message @@ -3402,81 +3123,6 @@ def get_preferred_transcripts(self, gene_id, return_all=False): return result - #result = [] - #command = "SELECT name, biotype, length, is_gencode_basic, is_mane_select, is_mane_plus_clinical, is_ensembl_canonical FROM transcript WHERE gene_id = %s" - #self.cursor.execute(command, (gene_id, )) - #result_raw = self.cursor.fetchall() - #transcripts = [] - #for elem in result_raw: - # if elem[0].startswith("ENST"): - # source = "ensembl" if elem[0].startswith("ENST") else "refseq" - # new_elem = {"name": elem[0], - # "biotype": elem[1], - # "length": elem[2], - # "is_gencode_basic": elem[3], - # "is_mane_select": elem[4], - # "is_mane_plus_clinical": elem[5], - # "is_ensembl_canonical": elem[6], - # "source": source - # } - # transcripts.append(new_elem) - # - #if len(transcripts) > 0: - # transcripts = self.order_transcripts(transcripts) - # - # if not return_all: - # result.append(transcripts.pop(0)) # always append the first one - # - # for transcript in transcripts: # scan for all mane select transcripts - # if transcript["is_mane_select"]: - # result.append(transcript) - # else: - # break # we can do this because the list is sorted - # else: - # result = transcripts - #else: # the variant does not have any consequences - # return None - #return result - - #def order_transcripts(self, consequences): - # keyfunc = cmp_to_key(mycmp = self.sort_transcripts) - # consequences.sort(key = keyfunc) # sort by preferred transcript - # return consequences - # - #def sort_transcripts(self, a, b): - # # sort by ensembl/refseq - # if a["source"] == 'ensembl' and b["source"] == 'refseq': - # return -1 - # elif a["source"] == 'refseq' and b["source"] == 'ensembl': - # return 1 - # elif a["source"] == b["source"]: -# - # # sort by mane select - # if a["is_mane_select"] is None or b["is_mane_select"] is None: - # return 1 - # elif a["is_mane_select"] and not b["is_mane_select"]: - # return -1 - # elif not a["is_mane_select"] and b["is_mane_select"]: - # return 1 - # elif a["is_mane_select"] == b["is_mane_select"]: -# - # # sort by biotype - # if a["biotype"] == 'protein coding' and b["biotype"] != 'protein coding': - # return -1 - # elif a["biotype"] != 'protein coding' and b["biotype"] == 'protein coding': - # return 1 - # elif (a["biotype"] != 'protein coding' and b["biotype"] != 'protein coding') or (a["biotype"] == 'protein coding' and b["biotype"] == 'protein coding'): -# - # # sort by length - # if a["length"] > b["length"]: - # return -1 - # elif a["length"] < b["length"]: - # return 1 - # else: - # return 0 - - - def insert_criterium_scheme(self, name, version, display_name, scheme_type, reference): @@ -3656,19 +3302,6 @@ def get_automatic_classification_criteria_applied(self, automatic_classification return result - ##### DELETE LATER! - #def get_automatic_classification_ids(self): - # command = "SELECT id FROM automatic_classification" - # self.cursor.execute(command) - # result = self.cursor.fetchall() - # return [x[0] for x in result] - # - ##### DELETE LATER! - #def update_automatic_classification(self, automatic_classification_id, classification_splicing, classification_protein): - # command = "UPDATE automatic_classification SET classification_splicing = %s, classification_protein = %s WHERE id = %s" - # self.cursor.execute(command, (classification_splicing, classification_protein, automatic_classification_id)) - # self.conn.commit() - def insert_coldspot(self, chrom, start, end, source): command = "INSERT INTO coldspots (chrom, start, end, source) VALUES (%s, %s, %s, %s)" self.cursor.execute(command, (chrom, start, end, source)) @@ -3767,16 +3400,6 @@ def get_most_recent_publish_heredicare_queue_id(self, vid, variant_id): result = self.cursor.fetchone() return result[0] - #def get_most_recent_publish_heredicare_queue_entries(self, variant_id): - # command = """ - # SELECT id, status, requested_at, finished_at, message, vid, variant_id, submission_id, consensus_classification_id FROM publish_heredicare_queue - # WHERE variant_id = %s AND id >= (SELECT MAX(id) FROM publish_heredicare_queue WHERE variant_id = %s AND status != "skipped") ORDER BY vid - # """ - # self.cursor.execute(command, (variant_id, variant_id)) - # result = self.cursor.fetchall() - # if len(result) == 0: - # return None - # return result def get_most_recent_publish_queue_ids_heredicare(self, variant_id): command = "SELECT DISTINCT publish_queue_id FROM publish_heredicare_queue WHERE variant_id = %s AND id >= (SELECT MAX(id) FROM publish_heredicare_queue WHERE variant_id = %s AND status != 'skipped')" @@ -3797,14 +3420,6 @@ def get_heredicare_queue_entries(self, publish_queue_ids: list, variant_id): self.cursor.execute(command, actual_information) result = self.cursor.fetchall() return result - - #def has_skipped_heredicare_publishes_before_finished_one(self, variant_id, last_finished_requested_at): - # command = "SELECT COUNT(id) FROM publish_heredicare_queue WHERE variant_id = %s AND status = 'skipped' AND requested_at > %s" - # self.cursor.execute(command, (variant_id, last_finished_requested_at)) - # result = self.cursor.fetchone() - # if result[0] > 0: - # return True - # return False def insert_publish_request(self, user_id: int, upload_heredicare: bool, upload_clinvar: bool, variant_ids: list): @@ -3829,16 +3444,6 @@ def close_publish_request(self, publish_queue_id, status, message): self.cursor.execute(command, (status, message, publish_queue_id)) self.conn.commit() - #def get_publish_request_overview(self): - # command = """ - # SELECT id, (SELECT first_name FROM user WHERE user.id=publish_queue.user_id)first_name, (SELECT last_name FROM user WHERE user.id=publish_queue.user_id)last_name, - # requested_at, status, finished_at, message, (SELECT DISTINCT COUNT(publish_clinvar_queue.variant_id) FROM publish_clinvar_queue WHERE publish_clinvar_queue.publish_queue_id=publish_queue.id) clinvar_subs, - # (SELECT DISTINCT COUNT(publish_heredicare_queue.variant_id) FROM publish_heredicare_queue WHERE publish_heredicare_queue.publish_queue_id=publish_queue.id) heredicare_subs - # FROM publish_queue - # """ - # self.cursor.execute(command) - # result = self.cursor.fetchall() - # return result def get_publish_requests_page(self, page, page_size): # get one page of import requests determined by offset & pagesize @@ -3906,16 +3511,6 @@ def update_publish_clinvar_queue_status(self, publish_clinvar_queue_id, status, self.conn.commit() - #def get_most_recent_publish_clinvar_queue_entry(self, variant_id): - # command = """ - # SELECT id, publish_queue_id, requested_at, status, message, submission_id, accession_id, last_updated, celery_task_id, consensus_classification_id FROM publish_clinvar_queue - # WHERE id = (SELECT MAX(id) FROM publish_clinvar_queue WHERE variant_id = %s and status != 'skipped') - # """ - # self.cursor.execute(command, (variant_id, )) - # result = self.cursor.fetchone() - # return result - - def get_most_recent_publish_queue_ids_clinvar(self, variant_id): command = "SELECT DISTINCT publish_queue_id FROM publish_clinvar_queue WHERE variant_id = %s AND id >= (SELECT MAX(id) FROM publish_clinvar_queue WHERE variant_id = %s AND status != 'skipped' AND status != 'deleted')" self.cursor.execute(command, (variant_id, variant_id)) @@ -3945,16 +3540,6 @@ def get_unique_publish_clinvar_queue_status(self): result = self.cursor.fetchall() return [x[0] for x in result] - # DEPRECATED: delete later - #def check_publish_queue_id(self, publish_queue_id): - # command = "SELECT id FROM publish_queue WHERE id = %s" - # self.cursor.execute(command, (publish_queue_id, )) - # result = self.cursor.fetchone() - # if result is None: - # return False - # return True - - def get_publish_request(self, publish_queue_id): command = "SELECT id, user_id, requested_at, finished_at, status, message, upload_clinvar, upload_heredicare, variant_ids FROM publish_queue WHERE id = %s" @@ -4176,4 +3761,5 @@ def invalidate_download_queue(self, download_queue_id): command = "UPDATE download_queue SET is_valid = 0 WHERE id = %s" self.cursor.execute(command, (download_queue_id, )) self.conn.commit() - \ No newline at end of file + + diff --git a/src/common/functions.py b/src/common/functions.py index 61ca9c5a..6c62fdbf 100644 --- a/src/common/functions.py +++ b/src/common/functions.py @@ -1,5 +1,4 @@ import os -import collections import datetime import re import sys @@ -17,11 +16,10 @@ import werkzeug - - def is_secure_filename(filename): return werkzeug.utils.secure_filename(filename) == filename + def prettyprint_json(json_obj, func = print): pretty_json = json.dumps(json_obj, indent=2) if func is not None: @@ -33,12 +31,14 @@ def prettyprint_json(json_obj, func = print): def basedir(): return os.getcwd() + def load_webapp_env(): webapp_env = os.environ.get('WEBAPP_ENV', None) if webapp_env is None: raise ValueError("No WEBAPP_ENV environment variable set.") return webapp_env + # converts one line from the variant table to a vcf record def variant_to_vcf(chr, pos, ref, alt, path, reference_genome="GRCh38"): #CHROM POS ID REF ALT QUAL FILTER INFO @@ -58,49 +58,13 @@ def variant_to_vcf(chr, pos, ref, alt, path, reference_genome="GRCh38"): file.close() return True + def cnv_to_bed(chrom, start, end, path): with open(path, "w") as file: line = '\t'.join([chrom, str(start), str(end)]) file.write(line + '\n') return True -#def read_vcf_info(path): -# file = open(path, "r") -# entries = [] -# info_headers = [] -# for line in file: -# if line.strip() == '': -# continue -# if line.startswith('##INFO'): -# info_headers.append(line.strip()) -# continue -# if not line.startswith('#'): -# l = line.split('\t')[7] -# entries.append(l.strip()) -# file.close() -# return info_headers, entries - - -#Record = collections.namedtuple('Record', [ -# 'CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER' -#]) - - -# doesnt collect FORMAT/INFO fields -#def read_vcf_variant(path): -# all_records = [] -# for line in open(path, "r"): -# if not line.startswith("#"): -# prep_line = line.strip().split("\t")#[0:upper_bound] -# rec = Record(prep_line[0], prep_line[1], prep_line[2], prep_line[3], prep_line[4], prep_line[5], prep_line[6]) -# all_records.append(rec) -# return all_records -# #variant = functions.read_vcf_variant(tmp_file_path)[0] # accessing only the first element of the returned list is save because we process only one variant at a time -# #new_chr = variant.CHROM -# #new_pos = variant.POS -# #new_ref = variant.REF -# #new_alt = variant.ALT - def get_refseq_chom_to_chrnum(): # taken from: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_assembly_report.txt @@ -111,6 +75,7 @@ def get_refseq_chom_to_chrnum(): "NC_000021.9": "chr21", "NC_000022.11": "chr22", "NC_000023.11": "chrX", "NC_000024.10": "chrY", "NC_012920.1": "chrMT"} return refseq_dict + def write_vcf_header(info_columns, output_func = print, tail = "", reference_genome="GRCh38"): output_func("##fileformat=VCFv4.2" + tail) output_func("##fileDate=" + get_today() + tail) @@ -130,7 +95,6 @@ def trim_chr(chr): def validate_chr(chr, max = 22): chr = trim_chr(chr) - if not chr in ['X', 'Y', 'M', 'MT'] and not chr in [str(i) for i in range(1,max+1)]: return False if chr == "M": @@ -145,36 +109,39 @@ def collect_info(old_info, new_info_name, new_value, sep = ';'): values_to_join.append(str(old_info)) if new_value is not None and new_value != '': values_to_join.append(new_info_name + str(new_value)) - return sep.join(values_to_join) - def trim_hgnc(hgnc_id): hgnc_id = hgnc_id.upper() if hgnc_id.startswith('HGNC:'): return hgnc_id[5:] else: return hgnc_id - + + def remove_version_num(identifier, char = '.'): if char in identifier: return identifier[:identifier.find(char)] else: return identifier + def is_dna(strg, search=re.compile(r'[^ACGTacgt-]').search): return not bool(search(strg)) + def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) + def convert_none_infinite(x): if x is None: return -float('inf') else: return x + def execute_command(command, process_name, use_prefix_error_log = True, stdout=subprocess.PIPE): completed_process = subprocess.Popen(command, stdout=stdout, stderr=subprocess.PIPE) command_output, std_err = completed_process.communicate()#[1].strip().decode("utf-8") # catch errors and warnings and convert to str @@ -192,6 +159,7 @@ def execute_command(command, process_name, use_prefix_error_log = True, stdout=s err_msg = err_msg + std_err return completed_process.returncode, err_msg, command_output + def grep(regex, filepath): command = ["grep", "-e", regex, filepath] status_code, err_msg, result = execute_command(command, "grep") @@ -241,10 +209,12 @@ def preprocess_variant(infile, do_liftover=False): return final_returncode, err_msg, command_output + def bgzip(path): returncode, err_msg, command_output = execute_command([os.path.join(paths.htslib_path, 'bgzip'), '-f', '-k', path], process_name="bgzip") return returncode, err_msg, command_output + def curate_chromosome(chrom): if chrom is None: return None, False @@ -271,6 +241,7 @@ def curate_position(pos): else: return pos, is_valid + def curate_sequence(seq, allowed = "ACGT-"): if seq is None: return None, False @@ -284,6 +255,7 @@ def curate_sequence(seq, allowed = "ACGT-"): is_valid = False return seq, is_valid + def filename_allowed(filename, allowed_extensions = {'vcf', 'txt'}): return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in allowed_extensions @@ -319,6 +291,7 @@ def check_vcf(path, ref_genome = 'GRCh38'): return returncode, err_msg, vcf_errors + def left_align_vcf(infile, outfile, ref_genome = 'GRCh38'): genome_path = '' if ref_genome == 'GRCh37': @@ -332,40 +305,6 @@ def left_align_vcf(infile, outfile, ref_genome = 'GRCh38'): return returncode, err_msg, command_output -## DEPRECATED -#def hgvsc_to_vcf(hgvs, reference = None): -# #tmp_file_path = tempfile.gettempdir() + "/hgvs_to_vcf" -# tmp_file_path = get_random_temp_file("_hgvs2vcf") -# tmp_file = open(tmp_file_path + ".tsv", "w") -# tmp_file.write("#reference hgvs_c\n") -# if reference is None: -# reference, hgvs = split_hgvs(hgvs) -# tmp_file.write(reference + "\t" + hgvs + "\n") -# tmp_file.close() -# -# command = [os.path.join(paths.ngs_bits_path, "HgvsToVcf")] -# command.extend(['-in', tmp_file_path + '.tsv', '-ref', paths.ref_genome_path, '-out', tmp_file_path + '.vcf']) -# returncode, err_msg, command_output = execute_command(command, "HgvsToVcf", use_prefix_error_log=False) -# -# chr = None -# pos = None -# ref = None -# alt = None -# tmp_file = open(tmp_file_path + '.vcf', "r") -# for line in tmp_file: # this assumes a single-entry vcf -# if line.strip() == '' or line.startswith('#'): -# continue -# parts = line.split('\t') -# chr = parts[0] -# pos = parts[1] -# ref = parts[3] -# alt = parts[4] -# -# -# rm(tmp_file_path + ".tsv") -# rm(tmp_file_path + ".vcf") -# return chr, pos, ref, alt, err_msg - def hgvsc_to_vcf(hgvs_strings, references = None): #tmp_file_path = tempfile.gettempdir() + "/hgvs_to_vcf" @@ -430,7 +369,6 @@ def split_hgvs(hgvs): hgvs = hgvs[double_point_pos+1:].strip() return reference, hgvs return None, hgvs - def find_between(s, prefix, postfix): @@ -462,6 +400,7 @@ def get_refseq_to_ensembl_transcript_dict(reverse = False): parsing_table.close() return result + def get_transcript_to_gene_dict(): ensembl_to_refseq = get_refseq_to_ensembl_transcript_dict(reverse = True) # ccds included! gene_to_ensembl_file = open(paths.gene_to_ensembl_transcript_path, 'r') @@ -531,14 +470,17 @@ def complement(seq, missing_data = "NA"): seq = seq.upper() return seq + def get_base64_encoding(path): with open(path, "rb") as pdf_file: encoded_string = base64.b64encode(pdf_file.read()) return encoded_string + def buffer_to_base64(buffer): return base64.b64encode(buffer.getvalue()) + # not used atm #def base64_to_file(base64_string, path): # file_64_decode = decode_base64(base64_string) @@ -546,9 +488,11 @@ def buffer_to_base64(buffer): # file_result.write(file_64_decode) # file_result.close() + def decode_base64(base64_string): return base64.b64decode(base64_string) + def encode_vcf(text): result = decode_html(text) result = text.replace(' ', '_') \ @@ -571,6 +515,7 @@ def encode_vcf(text): .replace('~1Y', '=') return result + def decode_vcf(text): result = text.replace('_', ' ') \ .replace('%3B', ';') \ @@ -582,11 +527,13 @@ def decode_vcf(text): .replace('%1Y', '=') return result + def encode_html(text): # this escapes special characters for the use in html text result = text.replace('>', '>') \ .replace('<', '<') return result + def decode_html(text): result = text.replace('>', '>') \ .replace('<', '<') @@ -604,6 +551,7 @@ def process_multiple(list_of_objects, sep = '~26', do_prefix = True) -> str: new_info = sep.join(infos) return new_info + def list_of_objects_to_dict(list_of_objects, key_func = lambda a : a, val_func = lambda a : a): result = {} for object in list_of_objects: @@ -627,18 +575,22 @@ def add_args_to_url(url, new_params): def get_today(): return datetime.datetime.today().strftime('%Y-%m-%d') + def get_now(): return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + def reformat_date(date_str, input_pattern, output_pattern): datetime_obj = datetime.datetime.strptime(date_str, input_pattern) return datetime_obj.strftime(output_pattern) + def days_between(d1, d2): d1 = datetime.datetime.strptime(d1, "%Y-%m-%d") d2 = datetime.datetime.strptime(d2, "%Y-%m-%d") return abs((d2 - d1).days) + def buffer_to_file_system(buffer, path): with open(path, 'w') as f: for line in buffer: @@ -653,6 +605,7 @@ def is_snv(one_var): else: return True + def read_dotenv(): webapp_env = os.environ.get('WEBAPP_ENV', None) if webapp_env is None: @@ -666,15 +619,18 @@ def read_dotenv(): load_dotenv(dotenv_path) + def enquote(string): string = str(string).strip("'") # remove quotes if the input string is already quoted! return "'" + string + "'" + def enbrace(string): #string = str(string).strip("(").strip(")") string = "(" + string + ")" return string + def enpercent(string): string = str(string).strip('%') return '%' + string + '%' @@ -684,14 +640,17 @@ def get_random_temp_file(fileending, filename_ext = "", folder = tempfile.gettem filename = collect_info(str(uuid.uuid4()), "", filename_ext, sep = '_') return os.path.join(folder, filename + "." + str(fileending.strip('.'))).strip('.') + def rm(path): if os.path.exists(path): os.remove(path) + def cleanup_files(*args): for path in args: rm(path) + def remove_oldest_file(folder, maxfiles=10): if os.path.exists(folder): list_of_files = os.listdir(folder) @@ -705,6 +664,7 @@ def remove_oldest_file(folder, maxfiles=10): def mkdir_recursive(dirpath): pathlib.Path(dirpath).mkdir(parents=True, exist_ok=True) + def str2datetime(datetime_str, fmt): if datetime_str is None: return None @@ -712,13 +672,13 @@ def str2datetime(datetime_str, fmt): return datetime.datetime.strptime(datetime_str, fmt) - def order_classes( classes): keyfunc = cmp_to_key(mycmp = sort_classes) classes = [str(x) for x in classes] classes.sort(key = keyfunc) # sort by preferred transcript return classes - + + def sort_classes(a, b): # sort by ensembl/refseq class_sequence = ['1', '2', '3-', '3', '3+', '4M', '4', '5', 'R'] @@ -732,6 +692,7 @@ def sort_classes(a, b): return -1 return 0 + def reverse_seq(seq): seq = seq.upper().replace('A', 't').replace('T', 'a').replace('G', 'c').replace('C', 'g') return seq.upper() @@ -840,6 +801,7 @@ def extend_dict(dictionary, key, new_value): dictionary[key] = [new_value] return dictionary + # format: chr:start-stop def get_sequence(chrom: str, start: int, end: int): # chrom, start and end is the region of interest @@ -859,6 +821,7 @@ def get_sequence(chrom: str, start: int, end: int): sequence, region = extract_sequence(stdout) return sequence, region + # fasta_str should only contain a single fasta entry def extract_sequence(fasta_str): lines = fasta_str.split('\n') @@ -874,6 +837,7 @@ def extract_sequence(fasta_str): sequence += line return sequence, region + def get_sv_variant_sequence(chrom, start, end, sv_type): start = int(start) end = int(end) @@ -901,8 +865,6 @@ def get_sv_variant_sequence(chrom, start, end, sv_type): return ref, alt, pos - - def get_preferred_genes(): return set(["ATM", "BARD1", "BRCA1", "BRCA2", "BRIP1", "CDH1", "CHEK2", "PALB2", "PTEN", "RAD51C", "RAD51D", "STK11", "TP53"]) @@ -912,11 +874,13 @@ def none_to_empty_list(obj): return [] return obj + def none2default(obj, default): if obj is None: return default return obj + def percent_to_decimal(input) -> float: if input is None: return None @@ -925,7 +889,6 @@ def percent_to_decimal(input) -> float: return input - def three_to_one_letter(s): s = s.lower() if s == "ala": return "A" @@ -953,6 +916,7 @@ def three_to_one_letter(s): if s == "ter": return "X" return "-" + def one_to_three_letter(s): s = s.upper() if s == "A": return "Ala" @@ -980,6 +944,3 @@ def one_to_three_letter(s): if s == "X": return "Ter" return "-" - - - diff --git a/src/common/heredicare_interface.py b/src/common/heredicare_interface.py index 29ed5834..6d1debef 100644 --- a/src/common/heredicare_interface.py +++ b/src/common/heredicare_interface.py @@ -38,6 +38,9 @@ class Heredicare(metaclass=Singleton): bearer = {} bearer_timestamp = {} + max_tries = 3 + backoff_mult = 20 + def __init__(self): self.base_url = "https://hazel.imise.uni-leipzig.de/pids2" # /project self.projects = { @@ -85,19 +88,41 @@ def get_bearer(self, project_type): # project_type is either "upload" or "downlo message = "" bearer = None status = "success" - url = self.get_url(project_type, "bearer") + auth = self.get_auth(project_type) - data = {"grant_type":"client_credentials"} if any([x is None for x in auth]): # bearer is none message = "ERROR: missing credentials for HerediCare API!" status = "error" else: - resp = requests.post(url, auth=auth, data=data) - if resp.status_code != 200: # bearer is None - message = "ERROR: HerediCare API client credentials endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text) - status = "error" - else: - bearer = resp.json()["access_token"] + retry = True + current_try = 0 + + while retry and (current_try <= self.max_tries): + url = self.get_url(project_type, "bearer") + data = {"grant_type":"client_credentials"} + + time.sleep(current_try * self.backoff_mult) + resp = requests.post(url, auth=auth, data=data) + + if resp.status_code in [401, 503]: # unauthorized, service unavailable --> retry these + message = "ERROR: HerediCare API client credentials endpoint returned an HTTP " + str(resp.status_code) + status = "error" + current_try += 1 + elif resp.status_code == 555: + message = "ERROR: HerediCare API client credentials endpoint returned an HTTP 555 error. Reason: " + urllib.parse.unquote(resp.headers.get("Error-Reason", "not provided")) + status = "error" + retry = False + elif resp.status_code != 200: + message = "ERROR: HerediCare API client credentials endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text) + status = "error" + retry = False + else: # success + status = "success" + message = "" + retry = False + bearer = resp.json()["access_token"] + #print(resp.text) + return bearer, status, message def extract_error_message(self, error_text): @@ -125,7 +150,6 @@ def introspect_token(self, project_type): now = datetime.now() status = "success" message = "" - #print("TOKEN: " + str(self.bearer)) bearer, timestamp = self.get_saved_bearer(project_type) if bearer is None: status, message = self.update_token(now, project_type) @@ -264,23 +288,11 @@ def get_new_submission_id(self): submission_id = None project_type = "upload" - status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary - if status == 'error': - return submission_id, status, message - url = self.get_url(project_type, "submissionid") - bearer, timestamp = self.get_saved_bearer(project_type) - header = {"Authorization": "Bearer " + bearer} + status, message, all_items = self.iterate_pagination(url, project_type) - resp = requests.get(url, headers=header) - if resp.status_code == 401: # unauthorized - message = "ERROR: HerediCare API get submission id endpoint returned an HTTP 401, unauthorized error. Attempting retry." - status = "retry" - elif resp.status_code != 200: - message = "ERROR: HerediCare API getsubmission id endpoint endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text) - status = "error" - else: # success - submission_id = resp.json()["items"][0]["submission_id"] + if status == "success": + submission_id = all_items[0]["submission_id"] return submission_id, status, message @@ -290,23 +302,11 @@ def get_new_record_id(self): record_id = None project_type = "upload" - status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary - if status == 'error': - return record_id, status, message - url = self.get_url(project_type, "recordid") - bearer, timestamp = self.get_saved_bearer(project_type) - header = {"Authorization": "Bearer " + bearer} + status, message, all_items = self.iterate_pagination(url, project_type) - resp = requests.get(url, headers=header) - if resp.status_code == 401: # unauthorized - message = "ERROR: HerediCare API get submission id endpoint returned an HTTP 401, unauthorized error. Attempting retry." - status = "retry" - elif resp.status_code != 200: - message = "ERROR: HerediCare API getsubmission id endpoint endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text) - status = "error" - else: # success - record_id = resp.json()["items"][0]["record_id"] + if status == "success": # success + record_id = all_items[0]["record_id"] return record_id, status, message @@ -320,26 +320,17 @@ def get_submission_status(self, submission_id): status = "pending" return finished_at, status, message - status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary - if status == 'error': - return finished_at, status, message - url = self.get_url(project_type, "submission_status", [str(submission_id)]) - bearer, timestamp = self.get_saved_bearer(project_type) - header = {"Authorization": "Bearer " + bearer} + status, message, all_items = self.iterate_pagination(url, project_type) - resp = requests.get(url, headers=header) - if resp.status_code != 200: - message = "ERROR: HerediCare API get submission id endpoint endpoint returned an HTTP " + str(resp.status_code) + " error: " + self.extract_error_message(resp.text) + if status == "error": status = "retry" - else: # success - resp = resp.json(strict=False) - items = resp["items"] + if status == "success": # success - if len(items) == 0: # submission id was generated but no data was posted yet + if len(all_items) == 0: # submission id was generated but no data was posted yet status = "pending" else: - info = items[0] + info = all_items[0] process_result = info["process_result"] process_date = info["process_date"] @@ -616,9 +607,6 @@ def get_data(self, variant, vid, options): submission_id = None post_regexes, status, message = self.get_post_regexes() - #if os.environ.get('WEBAPP_ENV', '') == 'dev': - # with open('/mnt/storage2/users/ahdoebm1/HerediVar/src/common/heredicare_interface_debug/post_regexes.json', "w") as f: - # functions.prettyprint_json(post_regexes, f.write) if status == "error": return data, vid, submission_id, status, message @@ -707,7 +695,6 @@ def _post_data(self, data): status = "success" message = "" retry = False - #print(resp.text) return status, message @@ -724,119 +711,3 @@ def post(self, variant, vid, options): return vid, submission_id, status, message - - - -#if __name__ == "__main__": -# functions.read_dotenv() -# heredicare_interface = Heredicare() -# -# variant_id_oi = "55" -# -# from common.db_IO import Connection -# conn = Connection(roles = ["db_admin"]) -# variant = conn.get_variant(variant_id_oi) -# conn.close() -# -# #submission_id, status, message = heredicare_interface.upload_consensus_classification(variant) -# #print(submission_id) -# #print(status) -# #print(message) -# -# #finished_at, status, message = heredicare_interface.get_submission_status(submission_id) -# #print(finished_at) -# #print(status) -# #print(message) -# -# -# #submission_id, status, message = heredicare_interface.get_new_submission_id() -# #print(submission_id) -# #print(status) -# #print(message) -# #finished_at, status, message = heredicare_interface.get_submission_status(submission_id) -# #print(finished_at) -# #print(status) -# #print(message) -# -# # 122: first success! -# finished_at, status, message = heredicare_interface.get_submission_status(122) -# print(finished_at) -# print(status) -# print(message) -# -# # -# heredicare_interface.get_data(variant, vid = "8882909", options = {"post_consensus_classification": True}) - -#if __name__ == "__main__": -# functions.read_dotenv() -# -# vids, status, message = heredicare_interface.get_vid_list() -# -# for vid_raw in vids: -# vid = vid_raw['record_id'] -# variant, status, message = heredicare_interface.get_variant(vid) -# -# if variant["PATH_TF"] != "-1": -# print(variant) -# break - - - - -""" -{ -GENERELLE INFORMATIONEN ÜBER DIE VARIANTE: - 'VID': '19439917' --> variant id - 'REV_STAT': '2' --> 2: kein review, 3: review erfolgt, 1: neu angelegt? - 'QUELLE': '2' --> 1: manuell, 2: upload - 'GEN': 'MSH2' --> das gen - 'REFSEQ': 'NM_000251.3' --> Transkript - 'ART': '-1' --> 1-6 sind klar, was bedeutet -1? - 'CHROM': '2' --> chromosom - 'POS_HG19': '47630514' --> hg19 position - 'REF_HG19': 'G' --> hg19 reference base - 'ALT_HG19': 'C' --> hg19 alternative base - 'POS_HG38': '47403375' --> hg38 position - 'REF_HG38': 'G' --> hg38 reference base - 'ALT_HG38': 'C' --> hg38 alternative base - 'KONS': '-1' --> consequence, value 1-10 - 'KONS_VCF': 'missense_variant' --> consequence?? was ist der Unterschied zu KONS? - 'CHGVS': 'c.184G>C' --> c.hgvs - 'PHGVS': 'Gly62Arg' --> p.hgvs - - 'CBIC': None --> ?? - 'PBIC': None --> ?? - 'CGCHBOC': None --> ?? - 'PGCHBOC': None --> ?? - -TF CONSENSUS KLASSIFIKATION: - 'PATH_TF': '-1' --> Klassifikation der task-force: Fragen: wofür werden die Werte 1-3 verwendet?, Was ist der Wert 20 (Artefakt)?, Was ist der Unterschied zwischen den Werten -1, 21 und 4? - 'BEMERK': None --> Das Kommentar der task-force zur Klassifikation - 'VUSTF_DATUM': None --> Datum der task-force Klassifikation - 'VUSTF_01': None --> annotation - 'VUSTF_02': None --> annotation - 'VUSTF_03': None --> annotation - 'VUSTF_04': None --> annotation - 'VUSTF_05': None --> annotation - 'VUSTF_06': None --> annotation - 'VUSTF_07': None --> annotation - 'VUSTF_08': None --> annotation - 'VUSTF_09': None --> annotation - 'VUSTF_10': None --> annotation - 'VUSTF_11': None --> annotation - 'VUSTF_12': None --> annotation - 'VUSTF_13': None --> annotation - 'VUSTF_14': None --> annotation - 'VUSTF_15': None --> annotation - literatur -> wichtig? - 'VUSTF_16': None --> annotation - evidenzlevel literatur -> wichtig? - 'VUSTF_17': None --> annotation - kommentar -> wichtig? - 'VUSTF_18': None --> annotation - 'VUSTF_BEMERK': None --> wo liegt hier der Unterschied zu BEMERK? - 'ERFDAT': '23.09.2023' --> Datum an dem die Variante hochgeladen wurde? - 'VISIBLE': '1' --> Ignoriere Varianten mit 0? - 'VID_REMAP': None --> Hat was mit VISIBLE zu tun. Wenn sie nicht visible ist dann ist es ein duplikat und wurde remapped auf die vid die dann hier steht? - 'N_PAT': '1' --> Anzahl der Fälle mit dieser Variante? - 'N_FAM': '1' --> Anzahl der Familien mit dieser Variante? -} - -""" \ No newline at end of file diff --git a/src/common/paths.py b/src/common/paths.py index 2c5a1df5..8658285c 100644 --- a/src/common/paths.py +++ b/src/common/paths.py @@ -30,8 +30,6 @@ def joinpaths(path, *paths): logs_dir = joinpaths(workdir, 'logs') downloads_dir = joinpaths(workdir, 'downloads') download_variant_list_dir = joinpaths(downloads_dir, 'variant_lists') - # classified variants folders are calculated in tasks.py - #report_dir = joinpaths(workdir, "") #'downloads/consensus_classification_reports/' # webapp logs path webapp_log_dir = joinpaths(logs_dir, "webapp") @@ -67,7 +65,6 @@ def joinpaths(path, *paths): #spliceai_path = joinpaths(datadir, "SpliceAI/spliceai_scores_2022_02_09_GRCh38.vcf.gz") #spliceai_path = joinpaths(datadir, "SpliceAI/spliceai_test.vcf.gz") cadd_snvs_path = joinpaths(datadir, "CADD/CADD_SNVs_1.6_GRCh38.vcf.gz") - #cadd_snvs_path = joinpaths(datadir, "CADD/CADD_SNVs_1.6_test.vcf.gz") cadd_indels_path = joinpaths(datadir, "CADD/CADD_InDels_1.6_GRCh38.vcf.gz") clinvar_path = joinpaths(datadir, "ClinVar/clinvar_converted_GRCh38.vcf.gz") submission_summary_path = joinpaths(datadir, "ClinVar/submission_summary_preprocessed.txt.gz") @@ -116,7 +113,6 @@ def joinpaths(path, *paths): resources_dir = joinpaths(workdir, 'resources') logs_dir = joinpaths(workdir, 'logs') classified_variants_dir = joinpaths(workdir, 'classified_variants') - #report_dir = joinpaths(workdir, "") #'downloads/consensus_classification_reports/' # webapp logs path webapp_log_dir = joinpaths(logs_dir, "test") @@ -147,7 +143,7 @@ def joinpaths(path, *paths): #metadata gnomad_path = datadir + "gnomAD.vcf.gz" gnomad_m_path = datadir + "gnomAD_mito.vcf.gz" - phylop_file_path = "/mnt/users/ahdoebm1/HerediVar/data/dbs/phyloP/hg38.phyloP100way.bw"#"https://download.imgag.de/public/dbs/phyloP/hg38_phyloP100way_vertebrate.bw" + phylop_file_path = "/mnt/users/ahdoebm1/HerediVar/data/dbs/phyloP/hg38.phyloP100way.bw" dbsnp_path = datadir + "dbSNP.vcf.gz" revel_path = datadir + "revel.vcf.gz" spliceai_path = datadir + "SpliceAI.vcf.gz" @@ -168,60 +164,10 @@ def joinpaths(path, *paths): # clinvar submission clinvar_submission_schema = joinpaths(resources_dir, "clinvar_submission_schemas/clinvar_submission_schema_18_10_23.json") -elif webapp_env == 'githubtest': - """ configuration for the testing environment on github actions """ - - workdir = "/home/runner/work/HerediVar/HerediVar/" - datadir = joinpaths(workdir, "src/annotation_service/tests/data/testdbs/") - toolsdir = joinpaths(workdir, "tools") - resources_dir = joinpaths(workdir, 'resources') - logs_dir = joinpaths(workdir, 'logs') - classified_variants_dir = joinpaths(workdir, 'classified_variants') - #datadir = "/data/" - - # tools - # vep not used atm - ngs_bits_path = "" # added to path variable - htslib_path = "" # added to path variable - - - # data - #ref_genome_path = "https://download.imgag.de/public/genomes/GRCh38.fa" - ref_genome_path = workdir + "GRCh38.fa" # used for spliceai - ref_genome_path_grch37 = workdir + "GRCh37.fa" - chainfile_path = workdir + "hg19ToHg38.fixed.over.chain.gz" - - - #metadata - gnomad_path = datadir + "gnomAD.vcf.gz" - gnomad_m_path = datadir + "gnomAD_mito.vcf.gz" - phylop_file_path = workdir + "hg38.phyloP100way.bw" - dbsnp_path = datadir + "dbSNP.vcf.gz" - revel_path = datadir + "revel.vcf.gz" - spliceai_path = datadir + "SpliceAI.vcf.gz" - cadd_snvs_path = datadir + "CADD.vcf.gz" - cadd_indels_path = datadir + "CADD_InDels.vcf.gz" - clinvar_path = datadir + "ClinVar.vcf.gz" - submission_summary_path = datadir + "ClinVar.txt.gz" - BRCA_exchange_path = datadir + "BRCA_exchange.vcf.gz" - FLOSSIES_path = datadir + "FLOSSIES.vcf.gz" - cancerhotspots_path = datadir + "cancerhotspots.vcf.gz" - #arup_brca_path = datadir + "ARUP_BRCA.vcf.gz" - tp53_db = datadir + "TP53_database.vcf.gz" - hci_priors = datadir + "HCI_priors.vcf.gz" - - # IGV data - igv_data_path = joinpaths(workdir, "src/frontend_celery/webapp/static/packages/igv/data") - - # clinvar submission - clinvar_submission_schema = joinpaths(resources_dir, "clinvar_submission_schemas/clinvar_submission_schema_18_10_23.json") - - elif webapp_env == 'prod': """ configuration for the production environment """ - # general paths workdir = "/mnt/storage1/HerediVar" datadir = joinpaths(workdir, "data/dbs") @@ -231,15 +177,11 @@ def joinpaths(path, *paths): logs_dir = joinpaths(workdir, 'logs') downloads_dir = joinpaths(workdir, 'downloads') download_variant_list_dir = joinpaths(downloads_dir, 'variant_lists') - #classified_variants_dir = joinpaths(workdir, 'classified_variants') - #report_dir = joinpaths(workdir, "") #'downloads/consensus_classification_reports/' # webapp logs path webapp_log_dir = joinpaths(logs_dir, "webapp") webapp_log = joinpaths(webapp_log_dir, "webapp.log") - - #tools vep_path = joinpaths(toolsdir, "ensembl-vep") vep_cache_dir = joinpaths(vep_path, "data/cache") @@ -250,17 +192,14 @@ def joinpaths(path, *paths): automatic_classification_path = joinpaths(toolsdir, "herediclass") automatic_classification_config_path = os.path.join(automatic_classification_path, "config_production.yaml") - # data ref_genome_dir = joinpaths(workdir, "data/genomes") ref_genome_path = joinpaths(ref_genome_dir, "GRCh38.fa") ref_genome_path_grch37 = joinpaths(ref_genome_dir, "GRCh37.fa") chainfile_path = joinpaths(ref_genome_dir, "hg19ToHg38.fixed.over.chain.gz") ensembl_transcript_path = joinpaths(datadir, "ensembl/Homo_sapiens.GRCh38.110.gff3") - #refseq_transcript_path = joinpaths(datadir, "RefSeq/refseq_transcripts_110.gff.gz") refseq_transcript_path = joinpaths(datadir, "RefSeq/refseq_transcripts_110.gff") refseq_transcript_4_consequence_path = joinpaths(datadir, "RefSeq/refseq_transcripts_110.4consequence.gff") - #metadata gnomad_path = joinpaths(datadir, "gnomAD/gnomAD_genome_GRCh38.vcf.gz") @@ -270,7 +209,6 @@ def joinpaths(path, *paths): revel_path = joinpaths(datadir, "REVEL/revel_grch38_all_chromosomes.vcf.gz") spliceai_snv_path = joinpaths(datadir, "SpliceAI/spliceai_scores.masked.snv.hg38.vcf.gz") spliceai_indel_path = joinpaths(datadir, "SpliceAI/spliceai_scores.masked.indel.hg38.vcf.gz") - #spliceai_path = joinpaths(datadir, "SpliceAI/spliceai_scores_2022_02_09_GRCh38.vcf.gz") cadd_snvs_path = joinpaths(datadir, "CADD/CADD_SNVs_1.6_GRCh38.vcf.gz") cadd_indels_path = joinpaths(datadir, "CADD/CADD_InDels_1.6_GRCh38.vcf.gz") clinvar_path = joinpaths(datadir, "ClinVar/clinvar_converted_GRCh38.vcf.gz") diff --git a/src/frontend_celery/config.py b/src/frontend_celery/config.py index 6398ba3a..4b868aae 100644 --- a/src/frontend_celery/config.py +++ b/src/frontend_celery/config.py @@ -116,14 +116,6 @@ class DevConfig(Config): #CLIENTSECRET = os.environ.get('CLIENT_SECRET', 'NRLzlQfotGy9W8hkuYFm3T48Bjnti15k') -class GithubtestConfig(Config): - #HOST = "127.0.0.1" # localhost - TESTING = True - DEBUG = True - TLS = False - - - class LocaltestConfig(Config): #HOST = "SRV018.img.med.uni-tuebingen.de" TESTING = True diff --git a/src/frontend_celery/main.py b/src/frontend_celery/main.py index dbf407db..cbc54f7f 100644 --- a/src/frontend_celery/main.py +++ b/src/frontend_celery/main.py @@ -1,14 +1,6 @@ -import os from webapp import create_app - - - app = create_app() - - - - if __name__ == '__main__': app.run(host=app.config['HOST'], port=app.config['PORT']) \ No newline at end of file diff --git a/src/frontend_celery/webapp/api/api_routes.py b/src/frontend_celery/webapp/api/api_routes.py index a975872a..d5ed8f0a 100644 --- a/src/frontend_celery/webapp/api/api_routes.py +++ b/src/frontend_celery/webapp/api/api_routes.py @@ -23,7 +23,7 @@ @require_api_token_permission(["read_only"]) def consensus_classification(): - conn = get_connection() #Connection(roles = ["read_only"]) + conn = get_connection() variant_id = request.args.get('variant_id') if variant_id is None: @@ -98,8 +98,6 @@ def check(): return jsonify(result) - - @api_blueprint.route('/api/v1.0/post/variant', methods = ['POST']) @require_api_token_permission(["user"]) def insert_variant(): @@ -110,5 +108,3 @@ def insert_variant(): return jsonify(create_result) - - diff --git a/src/frontend_celery/webapp/download/download_routes.py b/src/frontend_celery/webapp/download/download_routes.py index 20de00af..9ab0770b 100644 --- a/src/frontend_celery/webapp/download/download_routes.py +++ b/src/frontend_celery/webapp/download/download_routes.py @@ -9,7 +9,6 @@ import common.paths as paths from ..utils import * from . import download_functions, download_tasks -import werkzeug download_blueprint = Blueprint( @@ -53,35 +52,6 @@ def variant(): ) -## listens on get parameter: list_id -#@download_blueprint.route('/download/vcf/variant_list') -#@require_permission(['read_resources']) -#def variant_list(): -# conn = get_connection() -# -# list_id = request.args.get('list_id') -# require_valid(list_id, "user_variant_lists", conn) -# -# # check that the logged in user is the owner of this list -# require_list_permission(list_id, ['read'], conn) -# variant_ids_oi = conn.get_variant_ids_from_list(list_id) -# -# force_url = url_for("download.variant_list", list_id = list_id, force = True) -# redirect_url = url_for("user.my_lists", view = list_id) -# download_file_name = "list_" + str(list_id) + ".vcf" -# -# vcf_file_buffer, status, vcf_errors, err_msg = download_functions.get_vcf(variant_ids_oi, conn) -# -# if status == "redirect": -# flash({"message": "Error during VCF Check: " + vcf_errors + " with error message: " + err_msg + ". Download it anyway", -# "link": force_url}, "alert-danger") -# current_app.logger.error(get_preferred_username() + " tried to download a vcf which contains errors: " + vcf_errors + ". For variant list " + str(list_id)) -# return redirect(redirect_url) -# -# current_app.logger.info(get_preferred_username() + " downloaded vcf of variant list: " + str(list_id)) -# -# return send_file(vcf_file_buffer, as_attachment=True, download_name=download_file_name, mimetype="text/vcf") - # listens on get parameter: list_id @download_blueprint.route('/download/vcf/variant_list') @require_permission(['read_resources']) @@ -176,41 +146,6 @@ def generate_variant_list_vcf_status(): }) - -#@download_blueprint.route('/download/test') -#@require_permission(["admin_resources"]) -#def download_test(): -# import time -# def test_large_download(): -# for i in range(50): -# yield str(i).encode() -# print(i) -# time.sleep(1) -# return Response( -# stream_with_context(download_functions.test_large_download()), -# content_type = "text/event-stream", -# headers={'Content-Disposition': 'attachment; filename=test.txt', 'X-Accel-Buffering': 'no'} -# ) - -#@download_blueprint.route('/download/test_vcf') -#@require_permission(["admin_resources"]) -#def download_test_vcf(): -# conn = get_connection() -# -# list_id = request.args.get('list_id') -# require_valid(list_id, "user_variant_lists", conn) -# -# # check that the logged in user is the owner of this list -# require_list_permission(list_id, ['read'], conn) -# variant_ids_oi = conn.get_variant_ids_from_list(list_id) -# -# return Response( -# stream_with_context(download_functions.get_vcf_stream(variant_ids_oi, conn)), -# content_type = "text/event-stream", -# headers={'Content-Disposition': 'attachment; filename=test.txt', 'X-Accel-Buffering': 'no'} -# ) - - # listens on get parameter: raw @download_blueprint.route('/download/vcf/classified') @require_permission(['read_resources']) diff --git a/src/frontend_celery/webapp/download/download_tasks.py b/src/frontend_celery/webapp/download/download_tasks.py index 1676e41d..323ce0a8 100644 --- a/src/frontend_celery/webapp/download/download_tasks.py +++ b/src/frontend_celery/webapp/download/download_tasks.py @@ -7,7 +7,6 @@ from common import paths from common.db_IO import Connection from celery.exceptions import Ignore -import time from . import download_functions # errors @@ -72,7 +71,7 @@ def generate_list_vcf(self, list_id, user_roles, download_queue_id, filename): status = "error" message = "There was a runtime error: " + str(e) + ' ' + traceback.format_exc() - print(status) + #print(status) #print(message) if status != "retry": diff --git a/src/frontend_celery/webapp/errorhandlers/errorhandlers.py b/src/frontend_celery/webapp/errorhandlers/errorhandlers.py index 19f6f846..2d7bfd60 100644 --- a/src/frontend_celery/webapp/errorhandlers/errorhandlers.py +++ b/src/frontend_celery/webapp/errorhandlers/errorhandlers.py @@ -12,8 +12,6 @@ def forbidden(e): def internal_server_error(e): original = getattr(e, "original_exception", None) # original != None: unhandled exception - #current_app.logger.exception(e) - if original is None: # triggered if abort(500) is called original = e diff --git a/src/frontend_celery/webapp/static/js/utils.js b/src/frontend_celery/webapp/static/js/utils.js index 75259be5..d23086bf 100644 --- a/src/frontend_celery/webapp/static/js/utils.js +++ b/src/frontend_celery/webapp/static/js/utils.js @@ -540,8 +540,6 @@ function get_consensus_classification_color(classification) { /////////////////////////////////////////////////////////////// function create_trashcan() { - - var image = document.createElementNS("http://www.w3.org/2000/svg", "svg"); image.setAttribute("id", "delete-from-list-button") image.setAttribute("width", 17) @@ -551,7 +549,6 @@ function create_trashcan() { image.classList.add("bi-trash3") image.setAttribute("viewBox", "0 0 16 16") - var path = document.createElementNS("http://www.w3.org/2000/svg", "path"); path.setAttribute("d", "M6.5 1h3a.5.5 0 0 1 .5.5v1H6v-1a.5.5 0 0 1 .5-.5ZM11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3A1.5 1.5 0 0 0 5 1.5v1H2.506a.58.58 0 0 0-.01 0H1.5a.5.5 0 0 0 0 1h.538l.853 10.66A2 2 0 0 0 4.885 16h6.23a2 2 0 0 0 1.994-1.84l.853-10.66h.538a.5.5 0 0 0 0-1h-.995a.59.59 0 0 0-.01 0H11Zm1.958 1-.846 10.58a1 1 0 0 1-.997.92h-6.23a1 1 0 0 1-.997-.92L3.042 3.5h9.916Zm-7.487 1a.5.5 0 0 1 .528.47l.5 8.5a.5.5 0 0 1-.998.06L5 5.03a.5.5 0 0 1 .47-.53Zm5.058 0a.5.5 0 0 1 .47.53l-.5 8.5a.5.5 0 1 1-.998-.06l.5-8.5a.5.5 0 0 1 .528-.47ZM8 4.5a.5.5 0 0 1 .5.5v8.5a.5.5 0 0 1-1 0V5a.5.5 0 0 1 .5-.5Z") image.appendChild(path) @@ -559,6 +556,25 @@ function create_trashcan() { return image } + +function create_exclamation_mark() { + var image = document.createElementNS("http://www.w3.org/2000/svg", "svg"); + image.setAttribute("width", 16) + image.setAttribute("height", 16) + image.setAttribute("fill", "red") + image.classList.add("bi") + image.classList.add("bi-exclamation-triangle-fill") + image.setAttribute("viewBox", "0 0 16 16") + + var path = document.createElementNS("http://www.w3.org/2000/svg", "path"); + path.setAttribute("d", "M8.982 1.566a1.13 1.13 0 0 0-1.96 0L.165 13.233c-.457.778.091 1.767.98 1.767h13.713c.889 0 1.438-.99.98-1.767L8.982 1.566zM8 5c.535 0 .954.462.9.995l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995A.905.905 0 0 1 8 5zm.002 6a1 1 0 1 1 0 2 1 1 0 0 1 0-2z") + image.appendChild(path) + + return image +} + + + function create_xlg(parent, tooltip = "") { var td = document.createElement("td") td.classList.add('text_align_center') @@ -674,7 +690,7 @@ function remove_tooltip(element) { // utility for showing/updating the current status -function show_status(color_class, tooltip_text, inner_text, pill_holder_id, pill_id) { +function show_status(color_class, tooltip_text, inner_text, pill_holder_id, pill_id, prepend_html=null) { $('#' + pill_id).tooltip('hide') // hide tooltip in case it is shown - prevents persisting tooltips on update var pill_holder = document.getElementById(pill_holder_id) pill_holder.innerHTML = "" // delete previous pill @@ -685,6 +701,29 @@ function show_status(color_class, tooltip_text, inner_text, pill_holder_id, pill status_pill.setAttribute('data-bs-toggle', "tooltip") status_pill.setAttribute('title', tooltip_text) status_pill.id=pill_id - status_pill.innerText = inner_text + if (prepend_html != null) { + status_pill.classList.add("d-flex") + status_pill.classList.add("align-items-center") + prepend_html.classList.add("ssr") + status_pill.appendChild(prepend_html) + } + status_pill.appendChild(get_div(inner_text, [])) pill_holder.appendChild(status_pill) // add new pill +} + + +function get_div(text_content, classes, default_text = "") { + var res = document.createElement("div") + + classes.forEach(el => { + res.classList.add(el) + }); + + if (text_content == null) { + res.textContent = default_text + } else { + res.textContent = text_content + } + + return res } \ No newline at end of file diff --git a/src/frontend_celery/webapp/static/js/variant_addition.js b/src/frontend_celery/webapp/static/js/variant_addition.js index a3df1a5d..ad563dab 100644 --- a/src/frontend_celery/webapp/static/js/variant_addition.js +++ b/src/frontend_celery/webapp/static/js/variant_addition.js @@ -5,7 +5,8 @@ const delete_classification_url = flask_data.dataset.deleteClassificationUrl const annotation_status_url = flask_data.dataset.annotationStatusUrl const variant_id = flask_data.dataset.variantId const run_annotation_service_url = flask_data.dataset.runAnnotationServiceUrl - +const heredicare_upload_status_url = flask_data.dataset.heredicareUploadStatusUrl +const clinvar_upload_status_url = flask_data.dataset.clinvarUploadStatusUrl $(document).ready(function(){ @@ -74,6 +75,8 @@ $(document).ready(function(){ }) update_annotation_status(annotation_status_url); + update_heredicare_upload_status(heredicare_upload_status_url) + update_clinvar_upload_status(clinvar_upload_status_url) }) @@ -159,19 +162,189 @@ function show_annotation_status(color_class, tooltip_text, inner_text) { show_status(color_class, tooltip_text, inner_text, pill_holder_id, pill_id) } -//// utility for showing the current annotation status -//function show_annotation_status(color_class, tooltip_text, inner_text) { -// $('#annotation_status_pill').tooltip('hide') -// document.getElementById('annotation_status_pill_holder').innerHTML = "" -// var status_pill = document.createElement('span') -// status_pill.classList.add('badge') -// status_pill.classList.add('rounded-pill') -// status_pill.classList.add(color_class) -// status_pill.setAttribute('data-bs-toggle', "tooltip") -// status_pill.setAttribute('title', tooltip_text) -// status_pill.innerText = inner_text -// annotation_status_pill_holder.appendChild(status_pill) -//} + +function update_clinvar_upload_status(url) { + // send GET request to status URL (defined by flask) + $.ajax({ + type: 'GET', + url: url, + data: {"variant_id": variant_id}, + success: function(data, status, request) { + console.log(data) + if (data === undefined) { + show_clinvar_upload_status("bg-secondary", data, "no ClinVar submission") + } else { + if (data['status'] == 'multiple stati') { + show_clinvar_upload_status("bg-warning", data, "ClinVar multiple stati") + } else if (data['status'] == "processed") { + show_clinvar_upload_status("bg-success", data, "ClinVar success") + } else if (data['status'] == "error") { + show_clinvar_upload_status("bg-danger", data, "ClinVar error") + } else if (data['status'] == 'unknown') { + show_clinvar_upload_status("bg-secondary", data, "no ClinVar submission") + + } else { + show_clinvar_upload_status("bg-secondary", data, "ClinVar" + data["status"]) + } + } + }, + error: function(xhr) { + print(xhr) + show_clinvar_upload_status("bg-danger", "", "ClinVar internal error") + } + }) +} + +function show_clinvar_upload_status(color_class, summary, inner_text) { + const pill_holder_id = "clinvar_status_pill_holder" + const pill_id = "clinvar_status_pill" + var prepend_html = null + + if (summary["needs_upload"] && !["progress", "processing", "submitted", "pending", "waiting", "requesting"].includes(summary["status"])) { + prepend_html = create_exclamation_mark() + add_tooltip(prepend_html, "The consensus classification needs to be uploaded to ClinVar!") + } + show_status(color_class, "", inner_text, pill_holder_id, pill_id, prepend_html) + const content = get_clinvar_upload_status_content(summary["queue_entries"], summary["status"], summary["insert_tasks_message"]) + add_popover(pill_holder_id, pill_id, content) +} + +function get_clinvar_upload_status_content(entries, status, overall_message) { + var content = document.createElement("div") + if (entries != null && !["waiting", "requested"].includes(status)) { + // add header + var head = get_div("", ["row", "gx-2", "border-bottom", "bg-light"]) + head.appendChild(get_div("HerediCaRe VID", ["col-3"])) + head.appendChild(get_div("Status", ["col-3", "text-center", "border-start"])) + head.appendChild(get_div("Message", ["col-6", "text-center", "border-start"])) + content.appendChild(head) + + // add content lines + entries.forEach(entry => { + var line = get_div("", ["row", "gx-2"]) + line.appendChild(get_div(entry[6], ["col-3"], "None")) + line.appendChild(get_div(entry[3], ["col-3", "text-center", "border-start"], "None")) + line.appendChild(get_div(entry[4], ["col-6", "text-center", "border-start"], "None")) + content.appendChild(line) + }); + } + + // add overall task message + if (overall_message != "" && overall_message != null) { + content.appendChild(get_div(overall_message, [])) + } + + return content +} + + +function update_heredicare_upload_status(url) { + // send GET request to status URL (defined by flask) + $.ajax({ + type: 'GET', + url: url, + data: {"variant_id": variant_id}, + success: function(data, status, request) { + console.log(data) + if (data === undefined) { + show_heredicare_upload_status("bg-secondary", data, "no HerediCaRe submission") + } else { + if (data['status'] == 'multiple stati') { + show_heredicare_upload_status("bg-warning", data, "HerediCaRe multiple stati") + } else if (data['status'] == "success") { + show_heredicare_upload_status("bg-success", data, "HerediCaRe success") + } else if (data['status'] == "error") { + show_heredicare_upload_status("bg-danger", data, "HerediCaRe error") + } else if (data['status'] == "api_error") { + show_heredicare_upload_status("bg-danger", data, "HerediCaRe api error") + } else if (data['status'] == 'unknown') { + show_heredicare_upload_status("bg-secondary", data, "no HerediCaRe submission") + + } else { + show_heredicare_upload_status("bg-secondary", data, "HerediCaRe" + data["status"]) + } + } + }, + error: function(xhr) { + print(xhr) + show_heredicare_upload_status("bg-danger", "", "HerediCaRe internal error") + } + }) +} + + + + +function show_heredicare_upload_status(color_class, summary, inner_text) { + const pill_holder_id = "heredicare_status_pill_holder" + const pill_id = "heredicare_status_pill" + var prepend_html = null + + if (summary["needs_upload"] && ["error", "api_error", "success", "requested", "multiple stati"].includes(summary["status"])) { + prepend_html = create_exclamation_mark() + add_tooltip(prepend_html, "The consensus classification needs to be uploaded to HerediCaRe!") + } + show_status(color_class, "", inner_text, pill_holder_id, pill_id, prepend_html) + const content = get_heredicare_upload_status_content(summary["queue_entries"], summary["status"], summary["insert_tasks_message"]) + add_popover(pill_holder_id, pill_id, content) +} + + +function get_heredicare_upload_status_content(entries, status, overall_message) { + var content = document.createElement("div") + if (entries != null && !["waiting", "requested"].includes(status)) { + // add header + var head = get_div("", ["row", "gx-2", "border-bottom", "bg-light"]) + head.appendChild(get_div("HerediCaRe VID", ["col-3"])) + head.appendChild(get_div("Status", ["col-3", "text-center", "border-start"])) + head.appendChild(get_div("Message", ["col-6", "text-center", "border-start"])) + content.appendChild(head) + + // add content lines + entries.forEach(entry => { + var line = get_div("", ["row", "gx-2"]) + line.appendChild(get_div(entry[5], ["col-3"], "None")) + line.appendChild(get_div(entry[1], ["col-3", "text-center", "border-start"], "None")) + line.appendChild(get_div(entry[4], ["col-6", "text-center", "border-start"], "None")) + content.appendChild(line) + }); + } + + // add overall task message + if (overall_message != "" && overall_message != null) { + content.appendChild(get_div(overall_message, [])) + } + + return content +} + + +function add_popover(parent_id, trigger_id, content) { + var parent = document.getElementById(parent_id) + var trigger = document.getElementById(trigger_id) + + var popover = document.createElement("div"); + popover.classList.add("popover_collapse") + popover.classList.add("collapse") + popover.classList.add("collapse-horizontal") + + var content_holder = document.createElement("div") + content_holder.classList.add("card") + content_holder.classList.add("card-body") + content_holder.classList.add("width_very_large") + content_holder.appendChild(content) + + popover.appendChild(content_holder) + parent.appendChild(popover) + + trigger.classList.add("popover_collapse_toggle") + trigger.setAttribute("tabindex", "0") + trigger.setAttribute("role", "button") + trigger.setAttribute("data-bs-toggle", "collapse") + trigger.setAttribute("data-bs-target", ".popover_collapse_toggle:hover + .popover_collapse") + trigger.setAttribute("aria-expanded", "false") +} + ///////////////////////////////////////////////// diff --git a/src/frontend_celery/webapp/tasks.py b/src/frontend_celery/webapp/tasks.py index cd187dcd..5d7eb1a0 100644 --- a/src/frontend_celery/webapp/tasks.py +++ b/src/frontend_celery/webapp/tasks.py @@ -147,8 +147,6 @@ def get_vid_sets(conn: Connection, do_filter = True): annotation_type_id = conn.get_most_recent_annotation_type_id("heredicare_vid") all_vids_heredivar = conn.get_all_external_ids_from_annotation_type(annotation_type_id) - #intersection, heredivar_exclusive_vids, heredicare_exclusive_vids = compare_v_id_lists(all_vids_heredicare, vids_heredivar, filtered_vids_heredicare) - filtered_vids_heredicare = set(filtered_vids_heredicare) all_vids_heredivar = set(all_vids_heredivar) all_vids_heredicare = set(all_vids_heredicare) @@ -507,13 +505,8 @@ def map_hg38(variant, user_id, conn:Connection, insert_variant = True, perform_a #transcripts = conn.get_mane_select_for_gene(gene_id) if transcripts is not None: - #print(transcripts) - #print(variant["CHGVS"]) - chrom, pos, ref, alt, err_msg = functions.hgvsc_to_vcf([variant["CHGVS"]]*len(transcripts), transcripts) # convert to vcf - #print(err_msg) - if 'unequal' in err_msg: if err_msg not in message: message = functions.collect_info(message, "hgvs_msg=", err_msg, sep = " ~~ ") @@ -577,8 +570,8 @@ def validate_and_insert_variant(chrom, pos, ref, alt, genome_build, conn: Connec message = "" was_successful = True variant_id = None - # validate request + # validate request chrom, chrom_is_valid = functions.curate_chromosome(chrom) ref, ref_is_valid = functions.curate_sequence(ref, allowed_sequence_letters) alt, alt_is_valid = functions.curate_sequence(alt, allowed_sequence_letters) @@ -607,15 +600,15 @@ def validate_and_insert_variant(chrom, pos, ref, alt, genome_build, conn: Connec was_successful = False return was_successful, message, variant_id - - + # variant information valid tmp_file_path = functions.get_random_temp_file("vcf") functions.variant_to_vcf(chrom, pos, ref, alt, tmp_file_path, genome_build) + # lift if necceessary + leftalign + check reference base do_liftover = genome_build == 'GRCh37' returncode, err_msg, command_output = functions.preprocess_variant(tmp_file_path, do_liftover = do_liftover) - + # assess problems with the variant itself if returncode != 0: message = err_msg was_successful = False @@ -674,7 +667,6 @@ def validate_and_insert_variant(chrom, pos, ref, alt, genome_build, conn: Connec if not insert_variant: message += "HG38 variant would be: " + '-'.join([str(new_chr), str(new_pos), str(new_ref), str(new_alt)]) - functions.rm(tmp_file_path) functions.rm(tmp_file_path + ".lifted.unmap") return was_successful, message, variant_id @@ -712,7 +704,6 @@ def validate_and_insert_cnv(chrom: str, start: int, end: int, sv_type: str, impr was_successful = False return was_successful, message, variant_id - do_liftover = genome_build == 'GRCh37' if do_liftover: tmp_file_path = functions.get_random_temp_file("bed") @@ -764,7 +755,6 @@ def validate_and_insert_cnv(chrom: str, start: int, end: int, sv_type: str, impr start = new_start end = new_end - is_duplicate = conn.check_sv_duplicate(chrom, start, end, sv_type) if not is_duplicate and insert_variant: @@ -846,7 +836,9 @@ def annotate_variant(self, annotation_queue_id, job_config): - +################################################## +############## other random tasks ################ +################################################## def send_mail(subject, sender, recipient, text_body): diff --git a/src/frontend_celery/webapp/templates/index.html b/src/frontend_celery/webapp/templates/index.html index 6d1420f3..361cccb3 100644 --- a/src/frontend_celery/webapp/templates/index.html +++ b/src/frontend_celery/webapp/templates/index.html @@ -93,6 +93,18 @@

Overview

Changelog

+
v 1.14.1 (30.08.2024)
+
+ General changes: +
    +
  • Improved stability of HerediCaRe uploads
  • +
+ Bugfixes: +
    +
  • Improved and fixed some issues with HerediCaRe and ClinVar upload status display and update
  • +
+
+
v 1.14 (23.08.2024)
General changes: diff --git a/src/frontend_celery/webapp/templates/variant/variant.html b/src/frontend_celery/webapp/templates/variant/variant.html index addb5431..967c100b 100644 --- a/src/frontend_celery/webapp/templates/variant/variant.html +++ b/src/frontend_celery/webapp/templates/variant/variant.html @@ -20,105 +20,16 @@
- {% set pill_color = "bg-secondary" %} - {% if clinvar_queue_entry_summary["status"] == "multiple stati" %} - {% set pill_color = "bg-warning" %} - {% endif %} - {% if clinvar_queue_entry_summary["status"] in ["processed"] %} - {% set pill_color = "bg-success" %} - {% endif %} - {% if clinvar_queue_entry_summary["status"] in ["error"] %} - {% set pill_color = "bg-danger" %} - {% endif %} -
- {% set pill_color = "bg-secondary" %} - {% if heredicare_queue_entry_summary["status"] == "multiple stati" %} - {% set pill_color = "bg-warning" %} - {% endif %} - {% if heredicare_queue_entry_summary["status"] == "success" %} - {% set pill_color = "bg-success" %} - {% endif %} - {% if heredicare_queue_entry_summary["status"] == "error" or heredicare_queue_entry_summary["status"] == "api_error" %} - {% set pill_color = "bg-danger" %} - {% endif %} -
@@ -304,6 +215,8 @@

IGV

data-annotation-status-url='{{ url_for("variant.annotation_status") }}' data-variant-id="{{ variant.id }}" data-run-annotation-service-url='{{ url_for("variant.start_annotation_service") }}' + data-heredicare-upload-status-url='{{ url_for("variant.get_heredicare_upload_status") }}' + data-clinvar-upload-status-url='{{ url_for("variant.get_clinvar_upload_status") }}' /> {% endblock %} @@ -312,22 +225,3 @@

IGV

{% block additional_scripts %} {% endblock %} - \ No newline at end of file diff --git a/src/frontend_celery/webapp/upload/upload_functions.py b/src/frontend_celery/webapp/upload/upload_functions.py index 1769e403..cdf0ae8a 100644 --- a/src/frontend_celery/webapp/upload/upload_functions.py +++ b/src/frontend_celery/webapp/upload/upload_functions.py @@ -3,7 +3,6 @@ import sys sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) from common import functions -from common.heredicare_interface import Heredicare from common.db_IO import Connection sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) from utils import * diff --git a/src/frontend_celery/webapp/upload/upload_routes.py b/src/frontend_celery/webapp/upload/upload_routes.py index 814b8a26..695fceff 100644 --- a/src/frontend_celery/webapp/upload/upload_routes.py +++ b/src/frontend_celery/webapp/upload/upload_routes.py @@ -143,4 +143,3 @@ def edit_clinvar_submissions(variant_id): return redirect(url_for('variant.display', variant_id = variant_id)) return render_template('upload/edit_clinvar_submissions.html', variant = variant, clinvar_submissions = clinvar_submissions) - diff --git a/src/frontend_celery/webapp/user/user_functions.py b/src/frontend_celery/webapp/user/user_functions.py deleted file mode 100644 index a4d8a57b..00000000 --- a/src/frontend_celery/webapp/user/user_functions.py +++ /dev/null @@ -1,20 +0,0 @@ -from flask import render_template, request, url_for, flash, redirect, Blueprint, current_app, session, jsonify -from os import path -import sys -sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) -from common.db_IO import Connection -import common.functions as functions -from werkzeug.exceptions import abort - -from datetime import datetime -from ..utils import * -from flask_paginate import Pagination -import annotation_service.main as annotation_service -import frontend_celery.webapp.tasks as tasks -import random - - - - - - diff --git a/src/frontend_celery/webapp/user/user_routes.py b/src/frontend_celery/webapp/user/user_routes.py index 14d1d765..9956c394 100644 --- a/src/frontend_celery/webapp/user/user_routes.py +++ b/src/frontend_celery/webapp/user/user_routes.py @@ -1,18 +1,14 @@ from flask import render_template, request, url_for, flash, redirect, Blueprint, current_app, session, jsonify +from flask_paginate import Pagination from os import path import sys sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) from common.db_IO import Connection -from werkzeug.exceptions import abort import common.functions as functions -from datetime import datetime from ..utils import * -from flask_paginate import Pagination import annotation_service.main as annotation_service import frontend_celery.webapp.tasks as tasks -import random - -from . import user_functions +from werkzeug.exceptions import abort user_blueprint = Blueprint( 'user', @@ -425,7 +421,6 @@ def get_vis_page(request_args, import_queue_id, static_information, conn: Connec vids = extract_vids_vids(request_args) imported_variants, total = conn.get_imported_variants_page(comments, stati, vids, import_queue_id, page, page_size) - return imported_variants, total, page, page_size @@ -466,9 +461,6 @@ def variant_import_summary_data(import_queue_id): return jsonify({'import_request': import_request}) - - - # shows asll variant publish requests in server sided pagination # for both, clinvar and heredicare @user_blueprint.route('/variant_publish_history') diff --git a/src/frontend_celery/webapp/user/user_tasks.py b/src/frontend_celery/webapp/user/user_tasks.py deleted file mode 100644 index dd6fa1fc..00000000 --- a/src/frontend_celery/webapp/user/user_tasks.py +++ /dev/null @@ -1,17 +0,0 @@ -from os import path -import sys -sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) -from common.db_IO import Connection -from common import functions -from common.heredicare_interface import Heredicare -sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) -from webapp import celery, utils - -# errors: -from mysql.connector import Error, InternalError -from urllib.error import HTTPError -from celery.exceptions import Ignore -from werkzeug.exceptions import abort -import traceback - - diff --git a/src/frontend_celery/webapp/utils/decorators.py b/src/frontend_celery/webapp/utils/decorators.py index f02ea66f..4eb9f585 100644 --- a/src/frontend_celery/webapp/utils/decorators.py +++ b/src/frontend_celery/webapp/utils/decorators.py @@ -15,23 +15,6 @@ import common.functions as functions from common.db_IO import Connection -## used for api endpoints to check the bearer token header -## similar to require login, but for api endpoints -#def accept_api_token(f): -# @wraps(f) -# def decorated_function(*args, **kwargs): -# -# authorization_header = parse_authorization_header(request.headers.get('Authorization')) -# -# conn = Connection() -# api_key_ok = conn.check_api_key(authorization_header['apikey'], authorization_header['username']) -# conn.close() -# -# if not api_key_ok: -# abort(403, "Invalid credentials") -# -# return f(*args, **kwargs) -# return decorated_function def require_api_token_permission(roles): def decorator(f): @@ -111,13 +94,6 @@ def decorated_function(*args, **kwargs): resp = requests.post(url, data=data, headers=header) resp.raise_for_status() resp = resp.json() - #print(resp['active']) - - #url = f'{issuer}/protocol/openid-connect/userinfo' - #test_resp = requests.get(url, headers = {'Authorization': f'Bearer {token.get("access_token")}'}) - #print(token) - #print(test_resp) - #print(test_resp.text) # if access token is not valid request a new one using the refresh token if not resp['active']: diff --git a/src/frontend_celery/webapp/utils/import_heredicare.py b/src/frontend_celery/webapp/utils/import_heredicare.py index c24e7fcf..c9595319 100644 --- a/src/frontend_celery/webapp/utils/import_heredicare.py +++ b/src/frontend_celery/webapp/utils/import_heredicare.py @@ -4,8 +4,6 @@ from common.db_IO import Connection import common.functions as functions import common.paths as paths -import json -import argparse sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) import upload.upload_functions as upload_functions import frontend_celery.webapp.tasks as tasks diff --git a/src/frontend_celery/webapp/test.py b/src/frontend_celery/webapp/utils/test_upload.py similarity index 100% rename from src/frontend_celery/webapp/test.py rename to src/frontend_celery/webapp/utils/test_upload.py diff --git a/src/frontend_celery/webapp/utils/upload_status_checker.py b/src/frontend_celery/webapp/utils/upload_status_checker.py index 35e97d9e..5bac9f21 100644 --- a/src/frontend_celery/webapp/utils/upload_status_checker.py +++ b/src/frontend_celery/webapp/utils/upload_status_checker.py @@ -75,7 +75,7 @@ def check_update_heredicare_status(variant_id, publish_queue_ids_oi: list, conn: publish_heredicare_queue_id = heredicare_queue_entry[0] status = heredicare_queue_entry[1] submission_id = heredicare_queue_entry[7] - if status in ['pending', 'progress', 'submitted'] and submission_id is not None: + if status in ['pending', 'progress', 'submitted', 'retry'] and submission_id is not None: finished_at, status, message = check_heredicare_status(submission_id) conn.update_publish_heredicare_queue_status(publish_heredicare_queue_id, status, message, finished_at = finished_at) got_update = True @@ -98,10 +98,6 @@ def check_update_heredicare_status(variant_id, publish_queue_ids_oi: list, conn: return heredicare_queue_entries -#def check_update_all_progressing_heredicare(conn: Connection): -# variant_ids = conn.get_variant_ids_by_publish_heredicare_status(stati = ['pending', 'progress', 'submitted']) -# for variant_id in variant_ids: -# heredicare_queue_entries = check_update_heredicare_status(variant_id, conn) def check_update_all(variant_ids: list, publish_queue_ids_oi: list, conn: Connection): for variant_id in variant_ids: diff --git a/src/frontend_celery/webapp/variant/variant_functions.py b/src/frontend_celery/webapp/variant/variant_functions.py index 8691eec2..6c0dc008 100644 --- a/src/frontend_celery/webapp/variant/variant_functions.py +++ b/src/frontend_celery/webapp/variant/variant_functions.py @@ -71,7 +71,6 @@ def handle_consensus_classification(variant, classification, comment, scheme_id, criteria[criterium_id]['strength_description'] = conn.get_classification_criterium_strength(criteria[criterium_id]['criterium_strength_id'])[3] evidence_b64 = functions.buffer_to_base64(io.BytesIO()) - #functions.base64_to_file(evidence_b64, '/mnt/users/ahdoebm1/HerediVar/src/frontend/downloads/consensus_classification_reports/testreport.pdf') conn.insert_consensus_classification(session['user']['user_id'], variant.id, classification, comment, evidence_document=evidence_b64, date = current_datetime, scheme_id = scheme_id, scheme_class = scheme_class) return conn.get_last_insert_id() # returns the consensus_classification_id @@ -314,7 +313,11 @@ def handle_selected_literature(previous_selected_literature, classification_id, #heredicare_queue_entries: ALL heredicare queue entries until error or success is hit #publish_queue_heredicare_queue_entries: ONLY entries of publish_queue def summarize_heredicare_status(heredicare_queue_entries, publish_queue, mrcc): - summary = {"status": "unknown", "max_requested_at": "unknown", "insert_tasks_message": ""} + summary = {"status": "unknown", "max_requested_at": "unknown", "insert_tasks_message": "", "needs_upload": False, "queue_entries": heredicare_queue_entries} + + if mrcc is not None: + if mrcc.needs_heredicare_upload: + summary["needs_upload"] = True if publish_queue is not None: # fresh upload - preferred prefer_publish_queue_status = True @@ -365,8 +368,11 @@ def summarize_heredicare_status(heredicare_queue_entries, publish_queue, mrcc): def summarize_clinvar_status(clinvar_queue_entries, publish_queue, mrcc): + summary = {"status": "unknown", "insert_tasks_message": "", "needs_upload": False, "queue_entries": clinvar_queue_entries} - summary = {"status": "unknown", "insert_tasks_message": ""} + if mrcc is not None: + if mrcc.needs_clinvar_upload: + summary["needs_upload"] = True if publish_queue is not None: # fresh upload - preferred prefer_publish_queue_status = True @@ -402,29 +408,6 @@ def summarize_clinvar_status(clinvar_queue_entries, publish_queue, mrcc): if all_skipped: summary["status"] = "skipped" - #if publish_queue is not None: - # if clinvar_queue_entries is not None: - # all_skipped = True - # for clinvar_queue_entry in clinvar_queue_entries: - # current_status = clinvar_queue_entry[3] - # if current_status == 'skipped': - # continue - # all_skipped = False - # if summary["status"] == "unknown": - # summary["status"] = current_status - # elif summary["status"] != current_status: - # summary["status"] = "multiple stati" - # if all_skipped: - # summary["status"] = "skipped" - # else: - # if publish_queue.status == 'error': - # summary["status"] = "error" - # summary["insert_tasks_message"] = publish_queue.insert_tasks_message - # if publish_queue.insert_tasks_status == 'pending': - # summary["status"] = "waiting" - # elif publish_queue.insert_tasks_status == 'progress': - # summary["status"] = "requesting" - return summary @@ -463,7 +446,6 @@ def create_variant_from_request(request_obj, user, conn): result["flash_message"] = 'All fields are required!' result["flash_class"] = 'alert-danger flash_id:missing_data_vcf' result["status"] = "error" - #flash('All fields are required!', 'alert-danger flash_id:missing_data_vcf') else: was_successful, message, variant_id = tasks.validate_and_insert_variant(chrom, pos, ref, alt, genome_build, conn = conn, user_id = user.id) new_variant = conn.get_variant(variant_id, include_annotations=False, include_consensus = False, include_user_classifications = False, include_heredicare_classifications = False, include_automatic_classification=False, include_clinvar = False, include_consequences = False, include_assays = False, include_literature = False, include_external_ids=False) @@ -472,22 +454,17 @@ def create_variant_from_request(request_obj, user, conn): result["flash_class"] = 'alert-danger flash_id:variant_already_in_database' result["flash_link"] = url_for("variant.display", variant_id = new_variant.id) result["status"] = "skipped" - #flash({"message": "Variant not imported: " + new_variant.get_string_repr() + " already in database! View the variant", - # "link": url_for("variant.display", variant_id = new_variant.id)}, "alert-danger flash_id:variant_already_in_database") elif was_successful: result["flash_message"] = "Successfully inserted variant: " + new_variant.get_string_repr() result["flash_class"] = 'alert-success flash_id:successful_variant_from_vcf' result["flash_link"] = url_for("variant.display", variant_id = new_variant.id) result["status"] = "success" - #flash({"message": "Successfully inserted variant: " + new_variant.get_string_repr() + ". View your variant", - # "link": url_for("variant.display", variant_id = new_variant.id)}, "alert-success flash_id:successful_variant_from_vcf") current_app.logger.info(str(user.id) + " successfully created a new variant from vcf which resulted in this vcf-style variant: " + ' '.join([str(new_variant.chrom), str(new_variant.pos), new_variant.ref, new_variant.alt, "GRCh38"])) do_redirect = True else: # import had an error result["flash_message"] = message result["flash_class"] = 'alert-danger flash_id:variant_from_vcf_error' result["status"] = "error" - #flash(message, 'alert-danger flash_id:variant_from_vcf_error') if create_variant_from == 'hgvsc': reference_transcript = request_obj.form.get('transcript') @@ -497,13 +474,11 @@ def create_variant_from_request(request_obj, user, conn): result["flash_message"] = 'All fields are required!' result["flash_class"] = 'alert-danger flash_id:missing_data_hgvs' result["status"] = "error" - #flash('All fields are required!', 'alert-danger flash_id:missing_data_hgvs') else: chrom, pos, ref, alt, possible_errors = functions.hgvsc_to_vcf(reference_transcript + ':' + hgvsc) if possible_errors != '': flash_message = possible_errors flash_class = 'alert-danger flash_id:variant_from_hgvs_error' - #flash(possible_errors, "alert-danger flash_id:variant_from_hgvs_error") else: was_successful, message, variant_id = tasks.validate_and_insert_variant(chrom, pos, ref, alt, 'GRCh38', conn = conn, user_id = session['user']['user_id']) new_variant = conn.get_variant(variant_id, include_annotations=False, include_consensus = False, include_user_classifications = False, include_heredicare_classifications = False, include_clinvar = False, include_consequences = False, include_assays = False, include_literature = False) @@ -512,22 +487,17 @@ def create_variant_from_request(request_obj, user, conn): result["flash_class"] = 'alert-danger flash_id:variant_already_in_database' result["flash_link"] = url_for("variant.display", variant_id = new_variant.id) result["status"] = "skipped" - #flash({"message": "Variant not imported: " + new_variant.get_string_repr() + " already in database! View your variant", - # "link": url_for("variant.display", variant_id = new_variant.id)}, "alert-danger flash_id:variant_already_in_database") elif was_successful: result["flash_message"] = "Successfully inserted variant: " + new_variant.get_string_repr() result["flash_class"] = 'alert-success flash_id:successful_variant_from_hgvs' result["flash_link"] = url_for("variant.display", variant_id = new_variant.id) result["status"] = "success" - #flash({"message": "Successfully inserted variant: " + new_variant.get_string_repr() + ". View your variant", - # "link": url_for("variant.display", variant_id = new_variant.id)}, "alert-success flash_id:successful_variant_from_hgvs") current_app.logger.info(str(user.id) + " successfully created a new variant from hgvs: " + hgvsc + "Which resulted in this vcf-style variant: " + ' '.join([str(new_variant.chrom), str(new_variant.pos), new_variant.ref, new_variant.alt, "GRCh38"])) do_redirect = True else: result["flash_message"] = message result["flash_class"] = "alert-danger flash_id:variant_from_hgvs_error" result["status"] = "error" - #flash(message, 'alert-danger flash_id:variant_from_hgvs_error') if create_variant_from == 'vcf_file' and current_app.config["vcf_file_import_active"]: genome_build = request_obj.form.get('genome') @@ -535,7 +505,6 @@ def create_variant_from_request(request_obj, user, conn): result["flash_message"] = 'You must specify the genome build and select a vcf file.' result["flash_class"] = "alert-danger" result["status"] = "error" - #flash('You must specify the genome build and select a vcf file.', 'alert-danger') else: file = request_obj.files['file'] filename = file.filename @@ -544,7 +513,6 @@ def create_variant_from_request(request_obj, user, conn): result["flash_message"] = 'No valid file selected.' result["flash_class"] = "alert-danger" result["status"] = "error" - #flash('No valid file selected.', 'alert-danger') else: filepath = functions.get_random_temp_file(fileending = "tsv", filename_ext = "import_vcf") with open(filepath, "w") as f: # file is deleted in task + we have to write to disk because filehandle can not be json serialized and thus, can not be given to a celery task diff --git a/src/frontend_celery/webapp/variant/variant_routes.py b/src/frontend_celery/webapp/variant/variant_routes.py index 362f4939..8b5758bf 100644 --- a/src/frontend_celery/webapp/variant/variant_routes.py +++ b/src/frontend_celery/webapp/variant/variant_routes.py @@ -123,7 +123,6 @@ def create_sv(): @variant_blueprint.route('/display/', methods=['GET']) @variant_blueprint.route('/display/chr=&pos=&ref=&alt=', methods=['GET']) # alternative url using vcf information -# example: http:#srv018.img.med.uni-tuebingen.de:5000/display/chr=chr2&pos=214767531&ref=C&alt=T is the same as: http:#srv018.img.med.uni-tuebingen.de:5000/display/17 @require_permission(['read_resources']) def display(variant_id=None, chr=None, pos=None, ref=None, alt=None): conn = get_connection() @@ -137,35 +136,50 @@ def display(variant_id=None, chr=None, pos=None, ref=None, alt=None): # get available lists for user lists = conn.get_lists_for_user(user_id = session['user']['user_id'], variant_id=variant_id) - - # get the variant and all its annotations + # get variant variant = conn.get_variant(variant_id) - mrcc = variant.get_recent_consensus_classification() + + return render_template('variant/variant.html', + lists = lists, + variant = variant, + is_classification_report = False + ) + + +@variant_blueprint.route('/get_clinvar_upload_status', methods=['GET']) +@require_permission(['read_resources']) +def get_clinvar_upload_status(): + conn = get_connection() + + variant_id = request.args.get('variant_id') + require_valid(variant_id, "variant", conn) - # get current status of clinvar submission most_recent_publish_queue = conn.get_most_recent_publish_queue(variant_id = variant_id, upload_clinvar = True) publish_queue_ids_oi = conn.get_most_recent_publish_queue_ids_clinvar(variant_id) clinvar_queue_entries = check_update_clinvar_status(variant_id, publish_queue_ids_oi, conn) + + mrcc = conn.get_variant(variant_id, include_annotations=False, include_automatic_classification=False, include_clinvar = False, include_consequences=False).get_recent_consensus_classification() clinvar_queue_entry_summary = variant_functions.summarize_clinvar_status(clinvar_queue_entries, most_recent_publish_queue, mrcc) - - # get current status of heredicare submission + return clinvar_queue_entry_summary + + +@variant_blueprint.route('/get_heredicare_upload_status', methods=['GET']) +@require_permission(['read_resources']) +def get_heredicare_upload_status(): + conn = get_connection() + + variant_id = request.args.get('variant_id') + require_valid(variant_id, "variant", conn) + most_recent_publish_queue = conn.get_most_recent_publish_queue(variant_id = variant_id, upload_heredicare = True) publish_queue_ids_oi = conn.get_most_recent_publish_queue_ids_heredicare(variant_id) heredicare_queue_entries = check_update_heredicare_status(variant_id, publish_queue_ids_oi, conn) - #most_recent_heredicare_queue_entries = conn.get_heredicare_queue_entries([most_recent_publish_queue.id], variant_id) + + mrcc = conn.get_variant(variant_id, include_annotations=False, include_automatic_classification=False, include_clinvar = False, include_consequences=False).get_recent_consensus_classification() heredicare_queue_entry_summary = variant_functions.summarize_heredicare_status(heredicare_queue_entries, most_recent_publish_queue, mrcc) - - - return render_template('variant/variant.html', - lists = lists, - variant = variant, - is_classification_report = False, - clinvar_queue_entries = clinvar_queue_entries, - clinvar_queue_entry_summary = clinvar_queue_entry_summary, - heredicare_queue_entries = heredicare_queue_entries, - heredicare_queue_entry_summary = heredicare_queue_entry_summary - ) + + return heredicare_queue_entry_summary @variant_blueprint.route('/start_annotation_service', methods=['POST']) @@ -328,9 +342,8 @@ def consensus_classify(variant_id): variant = conn.get_variant(variant_id) classification_schemas = conn.get_classification_schemas() - #classification_schemas = {schema_id: classification_schemas[schema_id] for schema_id in classification_schemas} # remove no-scheme classification as this can not be submitted to clinvar - # this is also used to preselect from previous user classify submissions + # previous_classifications is also used to preselect from previous user classify submissions # -1 is the imaginary user id for the consensus classifications previous_classifications = {-1: variant.get_recent_consensus_classification_all_schemes(convert_to_dict = True)}