From 7a3b773a6a50937ebd3b531cc62efada71223634 Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Tue, 15 Oct 2024 09:00:58 +0200 Subject: [PATCH 1/6] add feature for generating samplesheet for differentialabundance --- conf/modules.config | 14 ++++- conf/test.config | 4 ++ conf/test_nothing.config | 4 ++ nextflow_schema.json | 19 +++++++ .../generate_downstream_samplesheet/main.nf | 52 +++++++++++++++++++ workflows/taxprofiler.nf | 23 ++++---- 6 files changed, 105 insertions(+), 11 deletions(-) create mode 100644 subworkflows/local/generate_downstream_samplesheet/main.nf diff --git a/conf/modules.config b/conf/modules.config index 1b82c9ee..4ac91711 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -856,9 +856,14 @@ process { ].join(' ').trim() } publishDir = [ - path: { "${params.outdir}/taxpasta/" }, + [ path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, pattern: '*.{tsv,csv,arrow,parquet,biom}' + ], + [ path: { "${params.outdir}/downstream_samplesheets/differentialabundance/"}, + mode: params.publish_dir_mode, + pattern: '*.{tsv,csv,arrow,parquet,biom}' + ] ] } @@ -875,9 +880,14 @@ process { ].join(' ').trim() } publishDir = [ - path: { "${params.outdir}/taxpasta/" }, + [ path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, pattern: '*.{tsv,csv,arrow,parquet,biom}' + ], + [ path: { "${params.outdir}/downstream_samplesheets/differentialabundance/" }, + mode: params.publish_dir_mode, + pattern: '*.{tsv,csv,arrow,parquet,biom}' + ] ] } diff --git a/conf/test.config b/conf/test.config index 4e457585..f122b8e0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -48,6 +48,10 @@ params { kraken2_save_reads = true centrifuge_save_reads = true run_profile_standardisation = true + + // Generate downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = 'differentialabundance' } process { diff --git a/conf/test_nothing.config b/conf/test_nothing.config index e8b87bc7..59f8fab9 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -41,6 +41,10 @@ params { run_motus = false run_kmcp = false run_ganon = false + + // Generate downstream samplesheets + generate_downstream_samplesheets = true + generate_pipeline_samplesheets = 'differentialabundance' } process { diff --git a/nextflow_schema.json b/nextflow_schema.json index 3ada1a56..3abbe2cc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -712,6 +712,25 @@ }, "fa_icon": "fas fa-chart-line" }, + "generate_samplesheet_options": { + "title": "Downstream pipeline samplesheet generation options", + "type": "object", + "fa_icon": "fas fa-align-justify", + "description": "Options for generating input samplesheets for complementary downstream pipelines.", + "properties": { + "generate_downstream_samplesheets": { + "type": "boolean", + "description": "Turn on generation of samplesheets for downstream pipelines.", + "fa_icon": "fas fa-toggle-on" + }, + "generate_pipeline_samplesheets": { + "type": "string", + "enum": ["differentialabundance"], + "description": "Specify which pipeline to generate a samplesheet for.", + "fa_icon": "fas fa-toolbox" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf new file mode 100644 index 00000000..8623aa01 --- /dev/null +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -0,0 +1,52 @@ +// +// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline +// + +workflow SAMPLESHEET_DIFFERENTIALABUNDANCE { + take: + ch_taxpasta + + main: + format_sep = '\t' + + ch_taxpasta.map { it -> + def tool_name = it[0]['tool'] + def id = it[0]['id'] + def file_path = it[1] + def samplesheet_name = file(file_path).getName() + + ch_list_for_samplesheet = Channel + .fromPath(file_path) + .splitCsv(sep: format_sep) + .map { row -> row.drop(1) } + .flatten() + + ch_colnames = Channel.of('sample') + + channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets/differentialabundance', samplesheet_name ) + } +} + +workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { + take: + ch_taxpasta + + mai: + def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") + + if ( downstreampipeline_names.contains('differentialabundance')) { + SAMPLESHEET_TAXPROFILER(ch_databases) + } +} + +def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, samplesheet_name) { + // Constructs the header string and then the strings of each row, and + // finally concatenates for saving. Originally designed by @mahesh-panchal + ch_header + .concat(ch_list_for_samplesheet) + .collectFile( + name: "${params.outdir}/${outdir_subdir}/${samplesheet_name}", + newLine: true, + sort: false + ) +} diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 93eb55dd..fd12bcb6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -63,15 +63,16 @@ if ( [params.taxpasta_add_name, params.taxpasta_add_rank, params.taxpasta_add_li // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' -include { NONPAREIL } from '../subworkflows/local/nonpareil' -include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' -include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' -include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' -include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' -include { PROFILING } from '../subworkflows/local/profiling' -include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' -include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles' +include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' +include { NONPAREIL } from '../subworkflows/local/nonpareil' +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' +include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' +include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' +include { PROFILING } from '../subworkflows/local/profiling' +include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' +include { STANDARDISATION_PROFILES } from '../subworkflows/local/standardisation_profiles' +include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheet/main.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -324,6 +325,10 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions ) } + //if ( params.generate_downstream_samplesheets ) { + // GENERATE_DOWNSTREAM_SAMPLESHEETS ( STANDARDISATION_PROFILES.out.taxpasta) + // } + /* MODULE: MultiQC */ From 1d3198fb1b784db81467c0f1100db95ac4e9a32b Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Tue, 15 Oct 2024 09:14:01 +0200 Subject: [PATCH 2/6] correct syntax in subworkflow generate_downstream_samplesheet --- docs/output.md | 26 +++++++++++++++++++ nextflow.config | 4 +++ .../generate_downstream_samplesheet/main.nf | 2 +- workflows/taxprofiler.nf | 6 ++--- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/docs/output.md b/docs/output.md index 7cf96395..9b9e45bf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -744,3 +744,29 @@ For example, DIAMOND output does not have a dedicated section in the MultiQC HTM [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. + +### Downstream samplesheets + +The pipeline can also generate input files for the following downstream +pipelines: + +- [nf-core/differentialabundance](https://nf-co.re/differentialabundance) + +
+Output files + +- `downstream_samplesheets/` + + - `samplesheet.csv`: A samplesheet for each classifier. + - `matrix.csv`: The output from taxpasta. + +
+ +This pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`. + +`samplesheet.csv` includes sample IDs from the `taxpasta` output for each classifier. You will need to specify the conditions (the groups you want to compare) as desired. + +:::warning +Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box! +They may not be complete (e.g. some columns may need to be manually filled in). +::: diff --git a/nextflow.config b/nextflow.config index e8dd7723..b48c5858 100644 --- a/nextflow.config +++ b/nextflow.config @@ -196,6 +196,10 @@ params { taxpasta_add_ranklineage = false taxpasta_ignore_errors = false standardisation_motus_generatebiom = false + + // Generate downstream samplesheet + generate_downstream_samplesheets = false + generate_pipeline_samplesheets = null } // Load base.config by default for all pipelines diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index 8623aa01..c4eabe4c 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -31,7 +31,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { take: ch_taxpasta - mai: + main: def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") if ( downstreampipeline_names.contains('differentialabundance')) { diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index fd12bcb6..7f96f7d6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -325,9 +325,9 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions ) } - //if ( params.generate_downstream_samplesheets ) { - // GENERATE_DOWNSTREAM_SAMPLESHEETS ( STANDARDISATION_PROFILES.out.taxpasta) - // } + if ( params.generate_downstream_samplesheets ) { + GENERATE_DOWNSTREAM_SAMPLESHEETS ( STANDARDISATION_PROFILES.out.taxpasta) + } /* MODULE: MultiQC From 946ebb6d936e8ac21a00ae4618a4660a4b669c71 Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Tue, 15 Oct 2024 09:23:25 +0200 Subject: [PATCH 3/6] update output.md --- docs/output.md | 2 +- subworkflows/local/generate_downstream_samplesheet/main.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 9b9e45bf..50d5a467 100644 --- a/docs/output.md +++ b/docs/output.md @@ -762,7 +762,7 @@ pipelines: -This pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`. +To generate the downsteam samplesheets for `nf-core/differentialabundance`, you need to turn on `--run_profile_standardisation`. The pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`. `samplesheet.csv` includes sample IDs from the `taxpasta` output for each classifier. You will need to specify the conditions (the groups you want to compare) as desired. diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index c4eabe4c..8f9d3ea8 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -35,7 +35,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS { def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",") if ( downstreampipeline_names.contains('differentialabundance')) { - SAMPLESHEET_TAXPROFILER(ch_databases) + SAMPLESHEET_DIFFERENTIALABUNDANCE(ch_taxpasta) } } From d05dd33aabf2e483f12553366777a395e257400c Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Tue, 15 Oct 2024 09:32:43 +0200 Subject: [PATCH 4/6] rename the samplesheet output in generate_downstream_samplesheet subworkflow --- subworkflows/local/generate_downstream_samplesheet/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf index 8f9d3ea8..975f8a7f 100644 --- a/subworkflows/local/generate_downstream_samplesheet/main.nf +++ b/subworkflows/local/generate_downstream_samplesheet/main.nf @@ -23,7 +23,7 @@ workflow SAMPLESHEET_DIFFERENTIALABUNDANCE { ch_colnames = Channel.of('sample') - channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets/differentialabundance', samplesheet_name ) + channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, "downstream_samplesheets/differentialabundance", "samplesheet_${samplesheet_name}" ) } } From 86276b07078a982a9ef3245aecacc63b2f93f093 Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Tue, 15 Oct 2024 09:44:22 +0200 Subject: [PATCH 5/6] Update schema --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 3abbe2cc..afcd5bf6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -725,9 +725,9 @@ }, "generate_pipeline_samplesheets": { "type": "string", - "enum": ["differentialabundance"], "description": "Specify which pipeline to generate a samplesheet for.", - "fa_icon": "fas fa-toolbox" + "fa_icon": "fas fa-toolbox", + "pattern": "^(differentialabundance)(?:,(differentialabundance)){0,1}" } } }, From 51270f4cbb6ab396c36521ffc4018bfab4390c23 Mon Sep 17 00:00:00 2001 From: LilyAnderssonLee Date: Wed, 16 Oct 2024 14:43:08 +0200 Subject: [PATCH 6/6] update docs and add condition to taxpasta oublishDir --- conf/modules.config | 20 ++++++-------------- docs/output.md | 5 ++++- nextflow_schema.json | 3 +++ 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 4ac91711..4939702f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -856,14 +856,10 @@ process { ].join(' ').trim() } publishDir = [ - [ path: { "${params.outdir}/taxpasta/" }, + path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, - pattern: '*.{tsv,csv,arrow,parquet,biom}' - ], - [ path: { "${params.outdir}/downstream_samplesheets/differentialabundance/"}, - mode: params.publish_dir_mode, - pattern: '*.{tsv,csv,arrow,parquet,biom}' - ] + pattern: '*.{tsv,csv,arrow,parquet,biom}', + saveAs: { params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets == "differentialabundance" ? "${params.outdir}/downstream_samplesheets/differentialabundance/" : null} ] } @@ -880,14 +876,10 @@ process { ].join(' ').trim() } publishDir = [ - [ path: { "${params.outdir}/taxpasta/" }, + path: { "${params.outdir}/taxpasta/" }, mode: params.publish_dir_mode, - pattern: '*.{tsv,csv,arrow,parquet,biom}' - ], - [ path: { "${params.outdir}/downstream_samplesheets/differentialabundance/" }, - mode: params.publish_dir_mode, - pattern: '*.{tsv,csv,arrow,parquet,biom}' - ] + pattern: '*.{tsv,csv,arrow,parquet,biom}', + saveAs: { params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets == "differentialabundance" ? "${params.outdir}/downstream_samplesheets/differentialabundance/" : null} ] } diff --git a/docs/output.md b/docs/output.md index 50d5a467..4bb1ef88 100644 --- a/docs/output.md +++ b/docs/output.md @@ -683,6 +683,9 @@ The following report files are used for the taxpasta step: Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool. ::: +The pipeline can also generate downstream pipeline input samplesheets. +These are stored in `/downstream_samplesheets`. + ### MultiQC
@@ -762,7 +765,7 @@ pipelines:
-To generate the downsteam samplesheets for `nf-core/differentialabundance`, you need to turn on `--run_profile_standardisation`. The pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`. +To generate the downsteam samplesheets for `nf-core/differentialabundance`, you need to turn on `--run_profile_standardisation`, `--generate_downstream_samplesheets` and `--generate_pipeline_samplesheets differentialabundance` The pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`. `samplesheet.csv` includes sample IDs from the `taxpasta` output for each classifier. You will need to specify the conditions (the groups you want to compare) as desired. diff --git a/nextflow_schema.json b/nextflow_schema.json index afcd5bf6..77119e80 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -991,6 +991,9 @@ { "$ref": "#/definitions/postprocessing_and_visualisation_options" }, + { + "$ref": "#/definitions/generate_samplesheet_options" + }, { "$ref": "#/definitions/institutional_config_options" },