From 50071e6c0e9d1c09b38cb5ef99bcd49b912dfe37 Mon Sep 17 00:00:00 2001 From: April Shen Date: Thu, 29 Apr 2021 17:54:20 +0100 Subject: [PATCH 1/4] treat skipped structural variants as a success in accessioning --- nextflow/accession.nf | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/nextflow/accession.nf b/nextflow/accession.nf index 0161aa35..bf30cced 100644 --- a/nextflow/accession.nf +++ b/nextflow/accession.nf @@ -60,6 +60,7 @@ process create_properties { output: path "${vcf_file.getFileName()}_accessioning.properties" into accession_props val accessioned_filename into accessioned_filenames + val log_filename into log_filenames exec: props = new Properties() @@ -69,6 +70,7 @@ process create_properties { props.setProperty("parameters.vcf", vcf_file.toString()) vcf_filename = vcf_file.getFileName().toString() accessioned_filename = vcf_filename.take(vcf_filename.indexOf(".vcf")) + ".accessioned.vcf" + log_filename = "accessioning.${vcf_filename}" props.setProperty("parameters.outputVcf", "${params.public_dir}/${accessioned_filename}") // need to explicitly store in workDir so next process can pick it up @@ -89,19 +91,16 @@ process create_properties { * Accession VCFs */ process accession_vcf { - clusterOptions { - log_filename = accession_properties.getFileName().toString() - log_filename = log_filename.take(log_filename.indexOf('_accessioning.properties')) - return "-g /accession/instance-${params.instance_id} \ - -o $params.logs_dir/accessioning.${log_filename}.log \ - -e $params.logs_dir/accessioning.${log_filename}.err" - } + clusterOptions "-g /accession/instance-${params.instance_id} \ + -o $params.logs_dir/${log_filename}.log \ + -e $params.logs_dir/${log_filename}.err" memory '8 GB' input: path accession_properties from accession_props val accessioned_filename from accessioned_filenames + val log_filename from log_filenames output: path "${accessioned_filename}.tmp" into accession_done @@ -109,7 +108,11 @@ process accession_vcf { """ filename=\$(basename $accession_properties) filename=\${filename%.*} - java -Xmx7g -jar $params.jar.accession_pipeline --spring.config.name=\$filename + java -Xmx7g -jar $params.jar.accession_pipeline --spring.config.name=\$filename || \ + # If accessioning fails due to missing variants, but the only missing variants are structural variants, + # then we should treat this as a success from the perspective of the automation. + [[ \$(grep -o 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | wc -l) \ + == \$(grep -oP '\\d+(?= unaccessioned variants need to be checked)' ${params.logs_dir}/${log_filename}.log) ]] echo "done" > ${accessioned_filename}.tmp """ } From 664edef58d2b4026b3ac04dbaa08b5bccbf441a5 Mon Sep 17 00:00:00 2001 From: April Shen Date: Fri, 30 Apr 2021 09:43:09 +0100 Subject: [PATCH 2/4] use primary mongo host --- eva_submission/eload_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/eva_submission/eload_utils.py b/eva_submission/eload_utils.py index 16cc0d04..b3259bc8 100644 --- a/eva_submission/eload_utils.py +++ b/eva_submission/eload_utils.py @@ -68,7 +68,9 @@ def get_metadata_conn(): def get_mongo_creds(): properties = get_properties_from_xml_file(cfg['maven']['environment'], cfg['maven']['settings_file']) - mongo_host = split_hosts(properties['eva.mongo.host'])[0][0] + # Use the primary mongo host from configuration: + # https://github.com/EBIvariation/configuration/blob/master/eva-maven-settings.xml#L111 + mongo_host = split_hosts(properties['eva.mongo.host'])[1][0] mongo_user = properties['eva.mongo.user'] mongo_pass = properties['eva.mongo.passwd'] return mongo_host, mongo_user, mongo_pass From f92d5832418885fc50aa2357f0fd982e3d2a0164 Mon Sep 17 00:00:00 2001 From: April Shen Date: Fri, 30 Apr 2021 13:19:05 +0100 Subject: [PATCH 3/4] Update nextflow/accession.nf Co-authored-by: sundarvenkata-EBI --- nextflow/accession.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow/accession.nf b/nextflow/accession.nf index bf30cced..6fbdb315 100644 --- a/nextflow/accession.nf +++ b/nextflow/accession.nf @@ -108,7 +108,7 @@ process accession_vcf { """ filename=\$(basename $accession_properties) filename=\${filename%.*} - java -Xmx7g -jar $params.jar.accession_pipeline --spring.config.name=\$filename || \ + (java -Xmx7g -jar $params.jar.accession_pipeline --spring.config.name=\$filename) || \ # If accessioning fails due to missing variants, but the only missing variants are structural variants, # then we should treat this as a success from the perspective of the automation. [[ \$(grep -o 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | wc -l) \ From 37ee8b3ae48b62ee7c6c7fe46f8124c93fbf81d5 Mon Sep 17 00:00:00 2001 From: April Shen Date: Fri, 30 Apr 2021 13:29:51 +0100 Subject: [PATCH 4/4] add TODOs --- eva_submission/eload_utils.py | 1 + nextflow/accession.nf | 1 + 2 files changed, 2 insertions(+) diff --git a/eva_submission/eload_utils.py b/eva_submission/eload_utils.py index b3259bc8..6e3476c4 100644 --- a/eva_submission/eload_utils.py +++ b/eva_submission/eload_utils.py @@ -70,6 +70,7 @@ def get_mongo_creds(): properties = get_properties_from_xml_file(cfg['maven']['environment'], cfg['maven']['settings_file']) # Use the primary mongo host from configuration: # https://github.com/EBIvariation/configuration/blob/master/eva-maven-settings.xml#L111 + # TODO: revisit once accessioning/variant pipelines can support multiple hosts mongo_host = split_hosts(properties['eva.mongo.host'])[1][0] mongo_user = properties['eva.mongo.user'] mongo_pass = properties['eva.mongo.passwd'] diff --git a/nextflow/accession.nf b/nextflow/accession.nf index 6fbdb315..c036f31f 100644 --- a/nextflow/accession.nf +++ b/nextflow/accession.nf @@ -111,6 +111,7 @@ process accession_vcf { (java -Xmx7g -jar $params.jar.accession_pipeline --spring.config.name=\$filename) || \ # If accessioning fails due to missing variants, but the only missing variants are structural variants, # then we should treat this as a success from the perspective of the automation. + # TODO revert once accessioning pipeline properly registers structural variants [[ \$(grep -o 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | wc -l) \ == \$(grep -oP '\\d+(?= unaccessioned variants need to be checked)' ${params.logs_dir}/${log_filename}.log) ]] echo "done" > ${accessioned_filename}.tmp