diff --git a/src/annotation_service/annotation_jobs/heredicare_job.py b/src/annotation_service/annotation_jobs/heredicare_job.py index 94c81385..eab87dc7 100644 --- a/src/annotation_service/annotation_jobs/heredicare_job.py +++ b/src/annotation_service/annotation_jobs/heredicare_job.py @@ -66,12 +66,17 @@ def save_to_db(self, info, variant_id, conn): err_msg += str(message) conn.delete_external_id(vid, heredicare_vid_annotation_type_id, variant_id) conn.delete_unknown_heredicare_annotations() - else: + # we need to check the length of the heredicare variant + # if it is 0 the variant is unknown to heredicare + # this might happen when a variant was just submitted to heredicare and it is not processed yet. Then, the vid is already known by heredivar but heredicare doesnt have information about this variant + # In this case we simply skip the heredicare annotation + # It is however good have the vid in heredivar right after insert because then we do not need a reimport after every heredicare insert + elif len(heredicare_variant) > 0: #print(heredicare_variant) n_fam = heredicare_variant["N_FAM"] n_pat = heredicare_variant["N_PAT"] consensus_class = heredicare_variant["PATH_TF"] if heredicare_variant["PATH_TF"] != "-1" else None - comment = heredicare_variant.get("VUSTF_21", heredicare_variant["VUSTF_15"]) + comment = heredicare_variant.get("VUSTF_21", heredicare_variant["VUSTF_15"]) # use vustf21, but if it is missing fallback to vustf15 - fallback can be removed later once the production heredicare api has the vustf21 field comment = comment.strip() if comment is not None else None classification_date = heredicare_variant["VUSTF_DATUM"] if heredicare_variant["VUSTF_DATUM"] != '' else None lr_cooc = heredicare_variant["LR_COOC"] diff --git a/src/common/db_IO.py b/src/common/db_IO.py index b6788bd0..5628a3f4 100644 --- a/src/common/db_IO.py +++ b/src/common/db_IO.py @@ -3512,7 +3512,7 @@ def update_publish_heredicare_queue_celery_task_id(self, publish_heredicare_queu self.cursor.execute(command, (celery_task_id, publish_heredicare_queue_id)) self.conn.commit() - def update_publish_heredicare_queue_status(self, publish_heredicare_queue_id, status, message, finished_at = None, submission_id = None, consensus_classification_id = None): + def update_publish_heredicare_queue_status(self, publish_heredicare_queue_id, status, message, finished_at = None, submission_id = None, consensus_classification_id = None, vid = None): command = "UPDATE publish_heredicare_queue SET status = %s, message = %s" actual_information = (status, message) if finished_at is not None: @@ -3524,6 +3524,9 @@ def update_publish_heredicare_queue_status(self, publish_heredicare_queue_id, st if consensus_classification_id is not None: command += ", consensus_classification_id = %s" actual_information += (consensus_classification_id, ) + if vid is not None: + command += ", vid = %s" + actual_information += (vid, ) command += " WHERE id = %s" actual_information += (publish_heredicare_queue_id, ) self.cursor.execute(command, actual_information) diff --git a/src/common/heredicare_interface.py b/src/common/heredicare_interface.py index d101d165..ff5ae37d 100644 --- a/src/common/heredicare_interface.py +++ b/src/common/heredicare_interface.py @@ -136,28 +136,12 @@ def introspect_token(self, project_type): def get_vid_list(self): status = "success" message = "" - vids = [] project_type = "download" - status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary - if status == 'error': - return vids, status, message - url = self.get_url(project_type, "vid_list") - bearer, timestamp = self.get_saved_bearer(project_type) - header = {"Authorization": "Bearer " + bearer} - - resp = requests.get(url, headers=header) - if resp.status_code == 401: # unauthorized - message = "ERROR: HerediCare API get vid list endpoint returned an HTTP 401, unauthorized error. Attempting retry." - status = "retry" - elif resp.status_code != 200: # any other kind of error - message = "ERROR: HerediCare API get vid list endpoint returned an HTTP " + str(resp.status_code) + " error: " + + self.extract_error_message(resp.text) - status = "error" - else: # request was successful - vids = resp.json()['items'] + status, message, all_vids = self.iterate_pagination(url, project_type) - return vids, status, message + return all_vids, status, message def filter_vid_list(self, vids, min_date): all_vids = [] @@ -189,32 +173,7 @@ def get_variant(self, vid): project_type = "download" url = self.get_url(project_type, "variant", path_args = [str(vid)]) - has_next = True - all_items = [] - while has_next: - status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary - if status == 'error': - return [], status, message - bearer, timestamp = self.get_saved_bearer(project_type) - header = {"Authorization": "Bearer " + bearer} - resp = requests.get(url, headers=header) - if resp.status_code == 401: # unauthorized - message = "ERROR: HerediCare API get variant info endpoint returned an HTTP 401, unauthorized error. Attempting retry." - status = "retry" - break - elif resp.status_code != 200: # any other kind of error - message = "ERROR: HerediCare API get variant info endpoint returned an HTTP " + str(resp.status_code) + " error: " + + self.extract_error_message(resp.text) - status = "error" - break - else: # request was successful - resp = resp.json() - new_items = resp["items"] - all_items.extend(new_items) - has_next = resp["hasMore"] - for link in resp["links"]: - if link["rel"] == "next": - url = link["href"] - + status, message, all_items = self.iterate_pagination(url, project_type) variant = self.convert_heredicare_variant_raw(all_items) return variant, status, message @@ -228,20 +187,16 @@ def convert_heredicare_variant_raw(self, variant_items): variant[item_name] = item_value return variant - - def get_post_regexes(self): + def iterate_pagination(self, start_url, project_type, items_key = "items"): status = "success" message = "" - all_items = [] - project_type = "upload" - - url = self.get_url(project_type, "post_info") # first url + result = [] + url = start_url has_next = True - while has_next: status, message = self.introspect_token(project_type) # checks validity of the token and updates it if neccessary if status == 'error': - return [], status, message + break bearer, timestamp = self.get_saved_bearer(project_type) header = {"Authorization": "Bearer " + bearer} resp = requests.get(url, headers=header) @@ -255,12 +210,30 @@ def get_post_regexes(self): break else: # request was successful resp = resp.json() - new_items = resp["items"] - all_items.extend(new_items) + new_items = resp[items_key] + result.extend(new_items) has_next = resp["hasMore"] - for link in resp["links"]: - if link["rel"] == "next": - url = link["href"] + if has_next: + found_next = False + for link in resp["links"]: + if link["rel"] == "next": + url = link["href"] + found_next = True + if not found_next: + message = "ERROR: response 'hasMore' attribute is true but no 'next' url found." + status = "error" + has_next = False # prevent infinite loop in case the next url is missing for some reason + break + return status, message, result + + def get_post_regexes(self): + status = "success" + message = "" + all_items = [] + project_type = "upload" + + url = self.get_url(project_type, "post_info") # first url + status, message, all_items = self.iterate_pagination(url, project_type) #with open("/mnt/storage2/users/ahdoebm1/HerediVar/src/common/heredicare_interface_debug/post_fields.json", "w") as f: # functions.prettyprint_json(all_items, func = f.write) @@ -269,10 +242,6 @@ def get_post_regexes(self): for item in all_items: item_name = item["item_name"] regex = item["item_regex_format"] - #if item_name == "PATH_TF": - # regex = r"^(-1|[1-4]|1[1-5]|2[01]|32|34)$" - #elif item_name == "VUSTF_DATUM": - # regex = r"^(0[1-9]|[12]\d|3[01])\.(0[1-9]|1[012])\.20\d\d$" result[item_name] = regex return result, status, message diff --git a/src/frontend_celery/webapp/tasks.py b/src/frontend_celery/webapp/tasks.py index d313e357..f1f42b0d 100644 --- a/src/frontend_celery/webapp/tasks.py +++ b/src/frontend_celery/webapp/tasks.py @@ -42,7 +42,6 @@ def start_variant_import(user_id, user_roles, conn: Connection): # starts the ce @celery.task(bind=True, retry_backoff=5, max_retries=3, time_limit=600) def heredicare_variant_import(self, user_id, user_roles, min_date, import_queue_id): """Background task for fetching variants from HerediCare""" - #from frontend_celery.webapp.utils.variant_importer import import_variants self.update_state(state='PROGRESS') try: @@ -96,6 +95,7 @@ def import_variants(conn: Connection, user_id, user_roles, min_date, import_queu heredicare_interface = Heredicare() vids_heredicare, status, message = heredicare_interface.get_vid_list() + print(len(vids_heredicare)) if status == "success": vids_heredicare, all_vids_heredicare, status, message = heredicare_interface.filter_vid_list(vids_heredicare, min_date) #all_vids_heredicare, status, message = heredicare_interface.get_vid_list() @@ -109,6 +109,7 @@ def import_variants(conn: Connection, user_id, user_roles, min_date, import_queu print("Total HerediCare: " + str(len(all_vids_heredicare))) print("Filtered HerediCare: " + str(len(vids_heredicare))) + print("Total vids HerediVar: " + str(len(vids_heredivar))) print("Intersection of filtered heredicare and heredivar vids: " + str(len(intersection))) print("Deleted vids (unknown to heredicare but known to heredivar): " + str(len(heredivar_exclusive_vids))) @@ -922,8 +923,8 @@ def abort_annotation_task(annotation_queue_id, celery_task_id, conn:Connection): if annotation_queue_id is not None: celery.control.revoke(celery_task_id, terminate = True) - #row_id, status, error_msg - conn.update_annotation_queue(annotation_queue_id, "aborted", "") + #row_id, status, error_msg + conn.update_annotation_queue(annotation_queue_id, "aborted", "") def purge_celery(): diff --git a/src/frontend_celery/webapp/templates/index.html b/src/frontend_celery/webapp/templates/index.html index cb900190..ce5c471b 100644 --- a/src/frontend_celery/webapp/templates/index.html +++ b/src/frontend_celery/webapp/templates/index.html @@ -94,7 +94,7 @@