diff --git a/eva_submission/eload_ingestion.py b/eva_submission/eload_ingestion.py index b2d64010..50703891 100644 --- a/eva_submission/eload_ingestion.py +++ b/eva_submission/eload_ingestion.py @@ -10,6 +10,7 @@ from ebi_eva_common_pyutils.config import cfg from ebi_eva_common_pyutils.config_utils import get_mongo_uri_for_eva_profile, get_primary_mongo_creds_for_profile, \ get_accession_pg_creds_for_profile, get_count_service_creds_for_profile +from ebi_eva_common_pyutils.ena_utils import get_assembly_name_and_taxonomy_id from ebi_eva_common_pyutils.metadata_utils import resolve_variant_warehouse_db_name, insert_new_assembly_and_taxonomy, \ get_assembly_set_from_metadata from ebi_eva_common_pyutils.pg_utils import get_all_results_for_query, execute_query @@ -90,7 +91,8 @@ def ingest( if 'optional_remap_and_cluster' in tasks: target_assembly = self._get_target_assembly() - if target_assembly: + # EVA-3207: Temporary limitation while we sort out the remapping across species + if target_assembly and self._target_assembly_from_same_taxonomy(target_assembly): self.run_remap_and_cluster_workflow(target_assembly, resume=resume) if do_variant_load or annotation_only: @@ -399,6 +401,15 @@ def _get_target_assembly(self): self.warning(f'Could not find any current supported assembly for {self.taxonomy}, skipping clustering') return None + def _target_assembly_from_same_taxonomy(self, target_assembly): + # Find taxonomy of the target assembly + _, taxonomy_id_from_target = get_assembly_name_and_taxonomy_id(target_assembly) + if int(taxonomy_id_from_target) != int(self.taxonomy): + self.warning(f'Target assembly {target_assembly} is from a different taxonomy {taxonomy_id_from_target} ' + f'compared to the current project {self.taxonomy}. Therefore remapping will not be carried out!') + return False + return True + def create_extraction_properties(self, output_file_path, taxonomy): properties = self.properties_generator.get_remapping_extraction_properties( taxonomy=taxonomy, diff --git a/tests/test_eload_ingestion.py b/tests/test_eload_ingestion.py index a7a23d10..18172a8b 100644 --- a/tests/test_eload_ingestion.py +++ b/tests/test_eload_ingestion.py @@ -151,11 +151,13 @@ def test_ingest_all_tasks(self): patch('eva_submission.eload_utils.get_all_results_for_query') as m_get_alias_results, \ patch('eva_submission.eload_ingestion.get_vep_and_vep_cache_version') as m_get_vep_versions, \ patch('eva_submission.eload_utils.requests.post') as m_post, \ + patch('eva_submission.eload_ingestion.get_assembly_name_and_taxonomy_id') as m_get_tax, \ self._patch_mongo_database(): m_get_alias_results.return_value = [['alias']] m_get_vep_versions.return_value = (100, 100, 'homo_sapiens') m_post.return_value.text = self.get_mock_result_for_ena_date() m_get_results.side_effect = default_db_results_for_ingestion() + m_get_tax.return_value = ('name', '9090') self.eload.ingest(1) def test_ingest_metadata_load(self): @@ -370,8 +372,10 @@ def test_ingest_clustering(self): with self._patch_metadata_handle(), \ patch('eva_submission.eload_ingestion.get_all_results_for_query') as m_get_results, \ patch('eva_submission.eload_ingestion.command_utils.run_command_with_output', autospec=True) as m_run_command, \ + patch('eva_submission.eload_ingestion.get_assembly_name_and_taxonomy_id') as m_get_tax, \ self._patch_mongo_database(): m_get_results.side_effect = default_db_results_for_clustering() + m_get_tax.return_value = ('name', '9796') self.eload.ingest(tasks=['optional_remap_and_cluster']) assert self.eload.eload_cfg.query('ingestion', 'remap_and_cluster', 'target_assembly') == 'GCA_123' assert m_run_command.call_count == 1 @@ -394,6 +398,7 @@ def test_resume_when_step_fails(self): patch('eva_submission.eload_utils.get_all_results_for_query') as m_get_alias_results, \ patch('eva_submission.eload_ingestion.get_vep_and_vep_cache_version') as m_get_vep_versions, \ patch('eva_submission.eload_utils.requests.post') as m_post, \ + patch('eva_submission.eload_ingestion.get_assembly_name_and_taxonomy_id') as m_get_tax, \ self._patch_mongo_database(): m_get_alias_results.return_value = [['alias']] m_get_vep_versions.return_value = (100, 100, 'homo_sapiens') @@ -408,6 +413,7 @@ def test_resume_when_step_fails(self): None, # clustering None, # variant load ] + m_get_tax.return_value = ('name', '9090') with self.assertRaises(subprocess.CalledProcessError): self.eload.ingest() @@ -425,11 +431,13 @@ def test_resume_completed_job(self): patch('eva_submission.eload_utils.get_all_results_for_query') as m_get_alias_results, \ patch('eva_submission.eload_ingestion.get_vep_and_vep_cache_version') as m_get_vep_versions, \ patch('eva_submission.eload_utils.requests.post') as m_post, \ + patch('eva_submission.eload_ingestion.get_assembly_name_and_taxonomy_id') as m_get_tax, \ self._patch_mongo_database(): m_get_alias_results.return_value = [['alias']] m_get_vep_versions.return_value = (100, 100, 'homo_sapiens') m_post.return_value.text = self.get_mock_result_for_ena_date() m_get_results.side_effect = default_db_results_for_ingestion() + default_db_results_for_ingestion() + m_get_tax.return_value = ('name', '9796') # Resuming with no existing job execution is fine self.eload.ingest(resume=True)