Skip to content

Commit

Permalink
update external ids & search for external ids
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvinDo committed Nov 3, 2023
1 parent b3e58be commit 1b0d1a1
Show file tree
Hide file tree
Showing 11 changed files with 253 additions and 105 deletions.
38 changes: 38 additions & 0 deletions resources/backups/database_dumper/update_database_2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,41 @@ ADD CONSTRAINT `FK_variant_heredicare_annotation_variant`

ALTER TABLE `HerediVar_ahdoebm1`.`annotation_queue`
CHANGE COLUMN `error_message` `error_message` TEXT NULL DEFAULT '' ;


ALTER TABLE `HerediVar_ahdoebm1`.`variant_ids`
ADD COLUMN `annotation_type_id` INT UNSIGNED NOT NULL AFTER `id_source`;


INSERT INTO `HerediVar_ahdoebm1`.`annotation_type` (`title`, `display_title`, `description`, `value_type`, `version`, `version_date`, `group_name`, `is_transcript_specific`) VALUES ('heredicare_vid', 'HerediCare VID', 'The VID from HerediCare.The version_date is inaccurate. They are always up to date when reimporting from heredicare.', 'int', '-', '2023-01-01', 'ID', '0');

UPDATE variant_ids SET annotation_type_id = (SELECT id FROM annotation_type WHERE title = 'heredicare_vid') WHERE id_source = 'heredicare'


ALTER TABLE `HerediVar_ahdoebm1`.`variant_ids`
ADD INDEX `FK_variant_ids_annotation_type_idx` (`annotation_type_id` ASC);
;
ALTER TABLE `HerediVar_ahdoebm1`.`variant_ids`
ADD CONSTRAINT `FK_variant_ids_annotation_type`
FOREIGN KEY (`annotation_type_id`)
REFERENCES `HerediVar_ahdoebm1`.`annotation_type` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION;


ALTER TABLE `HerediVar_ahdoebm1`.`variant_ids`
DROP COLUMN `id_source`,
DROP INDEX `variant_id_external_id_id_source_key` ,
ADD UNIQUE INDEX `variant_id_external_id_id_source_key` (`variant_id` ASC, `external_id` ASC);
;

ALTER TABLE `HerediVar_ahdoebm1`.`variant_ids`
DROP INDEX `variant_id_external_id_id_source_key`;
ALTER TABLE `HerediVar_ahdoebm1`.`variant_ids`
ADD UNIQUE INDEX `unique_variant_ids` (`variant_id` ASC, `external_id` ASC, `annotation_type_id` ASC);
;
UPDATE annotation_type SET group_name = "ID" WHERE title = 'rsid'



INSERT INTO `HerediVar_ahdoebm1`.`annotation_type` (`title`, `display_title`, `description`, `value_type`, `version`, `version_date`, `group_name`, `is_transcript_specific`) VALUES ('clinvar', 'ClinVar variation ID', 'The Variation ID from ClinVar', 'int', '-', '2023-02-26', 'None', '0');
13 changes: 12 additions & 1 deletion src/annotation_service/annotation_jobs/_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,15 @@ def insert_annotation(self, variant_id, info, info_name, annotation_type_id, con
if value_modifier_function is not None:
value = value_modifier_function(value)

conn.insert_variant_annotation(variant_id, annotation_type_id, value)
conn.insert_variant_annotation(variant_id, annotation_type_id, value)

def insert_external_id(self, variant_id, info, info_name, annotation_type_id, conn, value_modifier_function = None):
value = functions.find_between(info, info_name, '(;|$)')

if value == '' or value is None:
return

if value_modifier_function is not None:
value = value_modifier_function(value)

conn.insert_external_variant_id(variant_id = variant_id, annotation_type_id = annotation_type_id, external_id = value)
54 changes: 28 additions & 26 deletions src/annotation_service/annotation_jobs/annotate_from_vcf_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,46 +45,47 @@ def execute(self, inpath, annotated_inpath, **kwargs):


def save_to_db(self, info, variant_id, conn):
self.insert_annotation(variant_id, info, "dbSNP_RS=", 3, conn)
recent_annotation_ids = conn.get_recent_annotation_type_ids()
self.insert_external_id(variant_id, info, "dbSNP_RS=", recent_annotation_ids['rsid'], conn)

self.insert_annotation(variant_id, info, "REVEL=", 6, conn)
self.insert_annotation(variant_id, info, "REVEL=", recent_annotation_ids['revel'], conn)

self.insert_annotation(variant_id, info, "CADD=", 5, conn)
self.insert_annotation(variant_id, info, "CADD=", recent_annotation_ids['cadd_scaled'], conn)

self.insert_annotation(variant_id, info, "GnomAD_AC=", 11, conn)
self.insert_annotation(variant_id, info, "GnomAD_AF=", 12, conn)
self.insert_annotation(variant_id, info, "GnomAD_hom=", 13, conn)
self.insert_annotation(variant_id, info, "GnomAD_hemi=", 14, conn)
self.insert_annotation(variant_id, info, "GnomAD_het=", 15, conn)
self.insert_annotation(variant_id, info, "GnomAD_popmax=", 16, conn, value_modifier_function = lambda value : value.upper())
self.insert_annotation(variant_id, info, "GnomAD_AF_popmax=", 51, conn)
self.insert_annotation(variant_id, info, "GnomADm_AC_hom=", 17, conn)
self.insert_annotation(variant_id, info, "GnomAD_AC=", recent_annotation_ids['gnomad_ac'], conn)
self.insert_annotation(variant_id, info, "GnomAD_AF=", recent_annotation_ids['gnomad_af'], conn)
self.insert_annotation(variant_id, info, "GnomAD_hom=", recent_annotation_ids['gnomad_hom'], conn)
self.insert_annotation(variant_id, info, "GnomAD_hemi=", recent_annotation_ids['gnomad_hemi'], conn)
self.insert_annotation(variant_id, info, "GnomAD_het=", recent_annotation_ids['gnomad_het'], conn)
self.insert_annotation(variant_id, info, "GnomAD_popmax=", recent_annotation_ids['gnomad_popmax'], conn, value_modifier_function = lambda value : value.upper())
self.insert_annotation(variant_id, info, "GnomAD_AF_popmax=", recent_annotation_ids['gnomad_popmax_AF'], conn)
self.insert_annotation(variant_id, info, "GnomADm_AC_hom=", recent_annotation_ids['gnomadm_ac_hom'], conn)

self.insert_annotation(variant_id, info, "BRCA_exchange_clin_sig_short=", 18, conn, value_modifier_function = lambda value : value.replace('_', ' ').replace(',', ';'))
self.insert_annotation(variant_id, info, "BRCA_exchange_clin_sig_short=", recent_annotation_ids['brca_exchange_clinical_significance'], conn, value_modifier_function = lambda value : value.replace('_', ' ').replace(',', ';'))

self.insert_annotation(variant_id, info, "FLOSSIES_num_afr=", 19, conn)
self.insert_annotation(variant_id, info, "FLOSSIES_num_eur=", 20, conn)
self.insert_annotation(variant_id, info, "FLOSSIES_num_afr=", recent_annotation_ids['flossies_num_afr'], conn)
self.insert_annotation(variant_id, info, "FLOSSIES_num_eur=", recent_annotation_ids['flossies_num_eur'], conn)

self.insert_annotation(variant_id, info, "cancerhotspots_cancertypes=", 22, conn)
self.insert_annotation(variant_id, info, "cancerhotspots_AC=", 23, conn)
self.insert_annotation(variant_id, info, "cancerhotspots_AF=", 24, conn)
self.insert_annotation(variant_id, info, "cancerhotspots_cancertypes=", recent_annotation_ids['cancerhotspots_cancertypes'], conn)
self.insert_annotation(variant_id, info, "cancerhotspots_AC=", recent_annotation_ids['cancerhotspots_ac'], conn)
self.insert_annotation(variant_id, info, "cancerhotspots_AF=", recent_annotation_ids['cancerhotspots_af'], conn)

self.insert_annotation(variant_id, info, "ARUP_classification=", 21, conn)
self.insert_annotation(variant_id, info, "ARUP_classification=", recent_annotation_ids['arup_classification'], conn)

self.insert_annotation(variant_id, info, "HCI_prior=", 52, conn)
self.insert_annotation(variant_id, info, "HCI_prior=", recent_annotation_ids['hci_prior'], conn)

self.insert_annotation(variant_id, info, "BayesDEL_noAF=", 55, conn)
self.insert_annotation(variant_id, info, "BayesDEL_noAF=", recent_annotation_ids['bayesdel'], conn)

# spliceai is saved to the database in the dedicated spliceai job (which must be called after this job anyway)
#self.insert_annotation(variant_id, info, 'SpliceAI=', 7, conn, value_modifier_function= lambda value : ','.join(['|'.join(x.split('|')[1:]) for x in value.split(',')]) )
#self.insert_annotation(variant_id, info, 'SpliceAI=', 8, conn, value_modifier_function= lambda value : ','.join([str(max([float(x) for x in x.split('|')[2:6]])) for x in value.split(',')]) )

self.insert_annotation(variant_id, info, "tp53db_class=", 27, conn)
self.insert_annotation(variant_id, info, "tp53db_bayes_del=", 30, conn)
self.insert_annotation(variant_id, info, "tp53db_DNE_LOF_class=", 29, conn)
self.insert_annotation(variant_id, info, "tp53db_DNE_class=", 31, conn)
self.insert_annotation(variant_id, info, "tp53db_domain_function=", 32, conn)
self.insert_annotation(variant_id, info, "tp53db_transactivation_class=", 33, conn)
self.insert_annotation(variant_id, info, "tp53db_class=", recent_annotation_ids['tp53db_class'], conn)
self.insert_annotation(variant_id, info, "tp53db_bayes_del=", recent_annotation_ids['tp53db_bayes_del'], conn)
self.insert_annotation(variant_id, info, "tp53db_DNE_LOF_class=", recent_annotation_ids['tp53db_DNE_LOF_class'], conn)
self.insert_annotation(variant_id, info, "tp53db_DNE_class=", recent_annotation_ids['tp53db_DNE_class'], conn)
self.insert_annotation(variant_id, info, "tp53db_domain_function=", recent_annotation_ids['tp53db_domain_function'], conn)
self.insert_annotation(variant_id, info, "tp53db_transactivation_class=", recent_annotation_ids['tp53db_transactivation_class'], conn)
pmids = functions.find_between(info, 'tp53db_pubmed=', '(;|$)')
if pmids is not None and pmids != '':
if self.job_config['insert_literature']:
Expand All @@ -101,6 +102,7 @@ def save_to_db(self, info, variant_id, conn):
clinvar_submissions = clinvar_submissions.split('&')
clv_revstat = functions.find_between(info, 'ClinVar_revstat=', '(;|$)')
clv_varid = functions.find_between(info, 'ClinVar_varid=', '(;|$)')
self.insert_external_id(variant_id, info, "ClinVar_varid=", recent_annotation_ids['clinvar'], conn)
clv_inpret = functions.find_between(info, 'ClinVar_inpret=', '(;|$)')

if clv_revstat is not None and clv_inpret is not None and clv_varid is not None:
Expand Down
3 changes: 2 additions & 1 deletion src/annotation_service/annotation_jobs/heredicare_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def save_to_db(self, info, variant_id, conn):

conn.clear_heredicare_annotation(variant_id)

vids = conn.get_external_ids_from_variant_id(variant_id, id_source="heredicare") # the vids are imported from the import variants admin page
heredicare_vid_annotation_type_id = conn.get_most_recent_annotation_type_id('heredicare_vid')
vids = conn.get_external_ids_from_variant_id(variant_id, annotation_type_id=heredicare_vid_annotation_type_id) # the vids are imported from the import variants admin page

#print(vids)

Expand Down
Loading

0 comments on commit 1b0d1a1

Please sign in to comment.