From 72e2f96d4836c1a8cfcb8aca695b9d85c69e1547 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 25 Feb 2024 11:06:40 +0000 Subject: [PATCH 1/5] Increase the per-CPU memory of BWAMEM2_MEM at every attempt We had a bunch of failures where retrying with larger resources didn't help. They initially needed 16-22 GB of memory (for 12 CPUs) but were only getting 15 GB. Every attempt they would get more memory (20 GB, 25 GB, etc) but the number of CPUs used was increasing at the same time and pushing the memory usage even faster. Since it's a relatively small number of failures, I don't want to increase the base request. Instead, I'm increasing the per-CPU amount of memory at every attempt so that those species would have succeeded at the second attempt. For the largest offender: - 22 GB required at the first attempt (12 CPUs): 15 GB given -> failure - 32 GB required at the second attempt (18 CPUs): 34 GB given -> success --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index cdffac4..ba4be0d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -113,7 +113,7 @@ process { time = { check_max( 3.h * task.attempt * Math.ceil(positive_log(meta2.genome_size/100000, 10)) * Math.ceil(meta.read_count/1000000000) * 12 / log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'time' ) } // Base RAM usage is about 6 times the genome size. Each thread takes an additional 800 MB RAM // Memory usage of SAMTOOLS_VIEW is negligible. - memory = { check_max( 6.GB * Math.ceil(meta2.genome_size / 1000000000) + 800.MB * log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'memory' ) } + memory = { check_max( 6.GB * Math.ceil(meta2.genome_size / 1000000000) + 800.MB * task.attempt * log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'memory' ) } } withName: MINIMAP2_ALIGN { From 581e820f613a9692caa50985a9d007c6ed785f2f Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 25 Feb 2024 11:32:20 +0000 Subject: [PATCH 2/5] Increase the memory of SORMADUP with the number of CPUs Contrary to my first, limited, assessment of the SORMADUP composite module, the memory usage does increase with the number of CPUs. We found that after 1 SORMADUP failed because of MEMLIMIT at every attempt. I reran the job with all CPU requirements and checked the memory usage ``` 8 22,429 MB 14 27,303 MB 20 32,383 MB 26 37,089 MB 32 41,990 MB 38 46,931 MB ``` The correlation line is y=816*x+15929 I rounded it up to 850 MB per CPU and decreased the base offset accordingly so that the base memory assignment remains the same (after all, it's only 1 failure out of many species). However, I made sure that the per-CPU factor increases every attempt, so that that species would have succeeded at the second attempt. --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index ba4be0d..a340946 100644 --- a/conf/base.config +++ b/conf/base.config @@ -82,7 +82,7 @@ process { withName: 'SAMTOOLS_SORMADUP' { cpus = { log_increase_cpus(2, 6*task.attempt, 1, 2) } - memory = { check_max( 10.GB + 0.6.GB * Math.ceil( meta.read_count / 100000000 ) * task.attempt, 'memory' ) } + memory = { check_max( 4.GB + 850.MB * log_increase_cpus(2, 6*task.attempt, 1, 2) * task.attempt + 0.6.GB * Math.ceil( meta.read_count / 100000000 ), 'memory' ) } time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) } } From c1aa919aa49e9ba2aa70251dd93f0d58c4439ae3 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 26 Feb 2024 16:47:38 +0000 Subject: [PATCH 3/5] Fix for Nextflow 24.01-edge: functions have to be defined in the main nextflow.config https://github.com/nextflow-io/nextflow/issues/4722 --- conf/base.config | 27 --------------------------- nextflow.config | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/conf/base.config b/conf/base.config index a340946..bfd327b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -4,33 +4,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Increasing the number of CPUs often gives diminishing returns, so we increase it - following a logarithm curve. Example: - - 0 < value <= 1: start + step - - 1 < value <= 2: start + 2*step - - 2 < value <= 4: start + 3*step - - 4 < value <= 8: start + 4*step - In order to support re-runs, the step increase may be multiplied by the attempt - number prior to calling this function. -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Modified logarithm function that doesn't return negative numbers -def positive_log(value, base) { - if (value <= 1) { - return 0 - } else { - return Math.log(value)/Math.log(base) - } -} - -def log_increase_cpus(start, step, value, base) { - return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus') -} - - process { errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } diff --git a/nextflow.config b/nextflow.config index f9d5027..50cc99e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -222,3 +222,30 @@ def check_max(obj, type) { } } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Increasing the number of CPUs often gives diminishing returns, so we increase it + following a logarithm curve. Example: + - 0 < value <= 1: start + step + - 1 < value <= 2: start + 2*step + - 2 < value <= 4: start + 3*step + - 4 < value <= 8: start + 4*step + In order to support re-runs, the step increase may be multiplied by the attempt + number prior to calling this function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Modified logarithm function that doesn't return negative numbers +def positive_log(value, base) { + if (value <= 1) { + return 0 + } else { + return Math.log(value)/Math.log(base) + } +} + +def log_increase_cpus(start, step, value, base) { + return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus') +} + From a3e6a4e1eee793dd2a53069d1316e5c8bbb1706f Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 26 Feb 2024 17:29:54 +0000 Subject: [PATCH 4/5] Bumped the version number --- CHANGELOG.md | 6 ++++++ nextflow.config | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 384497c..ede56e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.2.1](https://github.com/sanger-tol/readmapping/releases/tag/1.2.1)] - [2024-02-27] + +### Enhancements & fixes + +- Increased the memory requests for reruns of BWAMEM2_MEM and SAMTOOLS_SORMADUP. + ## [[1.2.0](https://github.com/sanger-tol/readmapping/releases/tag/1.2.0)] – Norwegian Ridgeback - [2023-12-19] ### Enhancements & fixes diff --git a/nextflow.config b/nextflow.config index 50cc99e..03de7e7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -183,7 +183,7 @@ manifest { description = 'Pipeline to map reads generated using different sequencing technologies against a genome assembly.' mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.2.0' + version = '1.2.1' doi = '10.5281/zenodo.6563577' } From bba028e392b636f73cbbf293ab244566370726b6 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 28 Feb 2024 18:38:36 +0000 Subject: [PATCH 5/5] Couldn't miss the opportunity to release software on a Feb 29 ! --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ede56e9..69f955c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[1.2.1](https://github.com/sanger-tol/readmapping/releases/tag/1.2.1)] - [2024-02-27] +## [[1.2.1](https://github.com/sanger-tol/readmapping/releases/tag/1.2.1)] - [2024-02-29] ### Enhancements & fixes