nf-core · LilyAnderssonLee · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/conf/modules.config b/conf/modules.config
@@ -858,7 +858,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/taxpasta/" },
             mode: params.publish_dir_mode,
-            pattern: '*.{tsv,csv,arrow,parquet,biom}'
+            pattern: '*.{tsv,csv,arrow,parquet,biom}',
+            saveAs: { params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets == "differentialabundance" ? "${params.outdir}/downstream_samplesheets/differentialabundance/" : null}
         ]
     }
 
@@ -877,7 +878,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/taxpasta/" },
             mode: params.publish_dir_mode,
-            pattern: '*.{tsv,csv,arrow,parquet,biom}'
+            pattern: '*.{tsv,csv,arrow,parquet,biom}',
+            saveAs: { params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets == "differentialabundance" ? "${params.outdir}/downstream_samplesheets/differentialabundance/" : null}
         ]
     }
 

diff --git a/conf/test.config b/conf/test.config
@@ -48,6 +48,10 @@ params {
     kraken2_save_reads                     = true
     centrifuge_save_reads                  = true
     run_profile_standardisation            = true
+
+    // Generate downstream samplesheets
+    generate_downstream_samplesheets       = true
+    generate_pipeline_samplesheets         = 'differentialabundance'
 }
 
 process {

diff --git a/conf/test_nothing.config b/conf/test_nothing.config
@@ -41,6 +41,10 @@ params {
     run_motus                              = false
     run_kmcp                               = false
     run_ganon                              = false
+
+    // Generate downstream samplesheets
+    generate_downstream_samplesheets       = true
+    generate_pipeline_samplesheets         = 'differentialabundance'
 }
 
 process {

diff --git a/docs/output.md b/docs/output.md
@@ -683,6 +683,9 @@ The following report files are used for the taxpasta step:
 Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool.
 :::
 
+The pipeline can also generate downstream pipeline input samplesheets.
+These are stored in `<outdir>/downstream_samplesheets`.
+
 ### MultiQC
 
 <details markdown="1">
@@ -744,3 +747,29 @@ For example, DIAMOND output does not have a dedicated section in the MultiQC HTM
 </details>
 
 [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
+
+### Downstream samplesheets
+
+The pipeline can also generate input files for the following downstream
+pipelines:
+
+- [nf-core/differentialabundance](https://nf-co.re/differentialabundance)
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `downstream_samplesheets/`
+
+  - `samplesheet.csv`: A samplesheet for each classifier.
+  - `matrix.csv`: The output from taxpasta.
+
+</details>
+
+To generate the downsteam samplesheets for `nf-core/differentialabundance`, you need to turn on `--run_profile_standardisation`, `--generate_downstream_samplesheets` and `--generate_pipeline_samplesheets differentialabundance` The pipeline only generates the `samplesheet` and `matrix` inputs. You will need to manually prepare the `contrast` table before running `nf-core/differentialabundance`.
+
+`samplesheet.csv` includes sample IDs from the `taxpasta` output for each classifier. You will need to specify the conditions (the groups you want to compare) as desired.
+
+:::warning
+Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box!
+They may not be complete (e.g. some columns may need to be manually filled in).
+:::
diff --git a/nextflow.config b/nextflow.config
@@ -196,6 +196,10 @@ params {
     taxpasta_add_ranklineage                = false
     taxpasta_ignore_errors                  = false
     standardisation_motus_generatebiom      = false
+
+    // Generate downstream samplesheet
+    generate_downstream_samplesheets        = false
+    generate_pipeline_samplesheets          = null
 }
 
 // Load base.config by default for all pipelines

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -712,6 +712,25 @@
             },
             "fa_icon": "fas fa-chart-line"
         },
+        "generate_samplesheet_options": {
+            "title": "Downstream pipeline samplesheet generation options",
+            "type": "object",
+            "fa_icon": "fas fa-align-justify",
+            "description": "Options for generating input samplesheets for complementary downstream pipelines.",
+            "properties": {
+                "generate_downstream_samplesheets": {
+                    "type": "boolean",
+                    "description": "Turn on generation of samplesheets for downstream pipelines.",
+                    "fa_icon": "fas fa-toggle-on"
+                },
+                "generate_pipeline_samplesheets": {
+                    "type": "string",
+                    "description": "Specify which pipeline to generate a samplesheet for.",
+                    "fa_icon": "fas fa-toolbox",
+                    "pattern": "^(differentialabundance)(?:,(differentialabundance)){0,1}"
+                }
+            }
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -972,6 +991,9 @@
         {
             "$ref": "#/definitions/postprocessing_and_visualisation_options"
         },
+        {
+            "$ref": "#/definitions/generate_samplesheet_options"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },

diff --git a/subworkflows/local/generate_downstream_samplesheet/main.nf b/subworkflows/local/generate_downstream_samplesheet/main.nf
@@ -0,0 +1,52 @@
+//
+// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline
+//
+
+workflow SAMPLESHEET_DIFFERENTIALABUNDANCE {
+    take:
+    ch_taxpasta
+
+    main:
+    format_sep = '\t'
+
+    ch_taxpasta.map { it ->
+        def tool_name = it[0]['tool']
+        def id = it[0]['id']
+        def file_path = it[1]
+        def samplesheet_name = file(file_path).getName()
+
+        ch_list_for_samplesheet = Channel
+            .fromPath(file_path)
+            .splitCsv(sep: format_sep)
+            .map { row -> row.drop(1) }
+            .flatten()
+
+        ch_colnames = Channel.of('sample')
+
+        channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, "downstream_samplesheets/differentialabundance", "samplesheet_${samplesheet_name}" )
+    }
+}
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+    take:
+    ch_taxpasta
+
+    main:
+    def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
+
+    if ( downstreampipeline_names.contains('differentialabundance')) {
+        SAMPLESHEET_DIFFERENTIALABUNDANCE(ch_taxpasta)
+    }
+}
+
+def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, samplesheet_name) {
+    // Constructs the header string and then the strings of each row, and
+    // finally concatenates for saving. Originally designed by @mahesh-panchal
+    ch_header
+        .concat(ch_list_for_samplesheet)
+        .collectFile(
+            name: "${params.outdir}/${outdir_subdir}/${samplesheet_name}",
+            newLine: true,
+            sort: false
+        )
+}
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
@@ -63,15 +63,16 @@ if ( [params.taxpasta_add_name, params.taxpasta_add_rank, params.taxpasta_add_li
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
 
-include { SHORTREAD_PREPROCESSING       } from '../subworkflows/local/shortread_preprocessing'
-include { NONPAREIL                     } from '../subworkflows/local/nonpareil'
-include { LONGREAD_PREPROCESSING        } from '../subworkflows/local/longread_preprocessing'
-include { SHORTREAD_HOSTREMOVAL         } from '../subworkflows/local/shortread_hostremoval'
-include { LONGREAD_HOSTREMOVAL          } from '../subworkflows/local/longread_hostremoval'
-include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
-include { PROFILING                     } from '../subworkflows/local/profiling'
-include { VISUALIZATION_KRONA           } from '../subworkflows/local/visualization_krona'
-include { STANDARDISATION_PROFILES      } from '../subworkflows/local/standardisation_profiles'
+include { SHORTREAD_PREPROCESSING           } from '../subworkflows/local/shortread_preprocessing'
+include { NONPAREIL                         } from '../subworkflows/local/nonpareil'
+include { LONGREAD_PREPROCESSING            } from '../subworkflows/local/longread_preprocessing'
+include { SHORTREAD_HOSTREMOVAL             } from '../subworkflows/local/shortread_hostremoval'
+include { LONGREAD_HOSTREMOVAL              } from '../subworkflows/local/longread_hostremoval'
+include { SHORTREAD_COMPLEXITYFILTERING     } from '../subworkflows/local/shortread_complexityfiltering'
+include { PROFILING                         } from '../subworkflows/local/profiling'
+include { VISUALIZATION_KRONA               } from '../subworkflows/local/visualization_krona'
+include { STANDARDISATION_PROFILES          } from '../subworkflows/local/standardisation_profiles'
+include { GENERATE_DOWNSTREAM_SAMPLESHEETS  } from '../subworkflows/local/generate_downstream_samplesheet/main.nf'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -324,6 +325,10 @@ workflow TAXPROFILER {
         ch_versions = ch_versions.mix( STANDARDISATION_PROFILES.out.versions )
     }
 
+    if ( params.generate_downstream_samplesheets ) {
+            GENERATE_DOWNSTREAM_SAMPLESHEETS ( STANDARDISATION_PROFILES.out.taxpasta)
+        }
+
     /*
         MODULE: MultiQC
     */