NF_MAAffymetrix: nasa#104 remove all DE outputs

cyouh95 · Jul 23, 2024 · 0769d69 · 0769d69
1 parent 252b1b7
commit 0769d69
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 162 deletions.
diff --git a/...oarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/bin/Affymetrix.qmd b/...oarray/Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/bin/Affymetrix.qmd
@@ -760,9 +760,11 @@ runsheetToDesignMatrix <- function(runsheet_path) {
 design_data <- runsheetToDesignMatrix(runsheet)
 design <- design_data$matrix
 
-# Write SampleTable.csv and contrasts.csv file
-write.csv(design_data$groups, file.path(DIR_DGE, "SampleTable_GLmicroarray.csv"), row.names = FALSE)
-write.csv(design_data$contrasts, file.path(DIR_DGE, "contrasts_GLmicroarray.csv"))
+if (params$run_DE) {
+    # Write SampleTable.csv and contrasts.csv file
+    write.csv(design_data$groups, file.path(DIR_DGE, "SampleTable_GLmicroarray.csv"), row.names = FALSE)
+    write.csv(design_data$contrasts, file.path(DIR_DGE, "contrasts_GLmicroarray.csv"))
+}
 ```
 
 ### Perform Individual Probeset Level DE
@@ -858,11 +860,11 @@ write.csv(
   SAMPLE_COLUMN_ORDER)
   ], file.path(DIR_NORMALIZED_EXPRESSION, "normalized_expression_probeset_GLmicroarray.csv"), row.names = FALSE)
 
-### Generate and export PCA table for GeneLab visualization plots
-PCA_raw <- prcomp(t(exprs(probeset_level_data)), scale = FALSE) # Note: expression at the Probeset level is already log2 transformed
-write.csv(PCA_raw$x,
-          file.path(DIR_DGE, "visualization_PCA_table_GLmicroarray.csv")
-          )
+if (params$run_DE) {
+    ### Generate and export PCA table for GeneLab visualization plots
+    PCA_raw <- prcomp(t(exprs(probeset_level_data)), scale = FALSE) # Note: expression at the Probeset level is already log2 transformed
+    write.csv(PCA_raw$x, file.path(DIR_DGE, "visualization_PCA_table_GLmicroarray.csv"))
+}
 
 ## Determine column order for probe level tables
 

diff --git a/...w_Documentation/NF_MAAffymetrix/workflow_code/bin/dp_tools__affymetrix_skipDE/config.yaml b/...w_Documentation/NF_MAAffymetrix/workflow_code/bin/dp_tools__affymetrix_skipDE/config.yaml
@@ -183,31 +183,6 @@ data assets:
 
     resource categories: *neverPublished
 
-  sample table:
-    processed location: 
-      - *DGEDataDir
-      - "SampleTable_GLmicroarray.csv"
-
-    tags:
-      - processed
-
-    resource categories: &DGEAnalysisData
-      subcategory: Differential Expression Analysis Data
-      subdirectory: ""
-      publish to repo: true
-      include subdirectory in table: false
-      table order: 14
-
-  DE contrasts table:
-    processed location: 
-      - *DGEDataDir
-      - "contrasts_GLmicroarray.csv"
-
-    tags:
-      - processed
-
-    resource categories: *DGEAnalysisData
-
   raw probe intensities table:
     processed location: 
       - *RawDir
@@ -253,16 +228,6 @@ data assets:
       include subdirectory in table: false
       table order: 2
 
-  viz PCA table:
-    processed location: 
-      - *DGEDataDir
-      - "visualization_PCA_table_GLmicroarray.csv"
-
-    tags:
-      - processed
-
-    resource categories: *neverPublished
-
 data asset sets:
   # These assets are not generated in the workflow, but are generated after the workflow
   PUTATIVE: []

diff --git a/...w_Documentation/NF_MAAffymetrix/workflow_code/bin/dp_tools__affymetrix_skipDE/protocol.py b/...w_Documentation/NF_MAAffymetrix/workflow_code/bin/dp_tools__affymetrix_skipDE/protocol.py
@@ -131,124 +131,6 @@ def validate(
                                 - Halt: At least one condition failed
                     """)
                     )
-        with vp.component_start(
-            name="DE Metadata",
-            description="",
-        ):
-
-            with vp.component_start(
-                name="Sample Table",
-                description="",
-            ):
-                with vp.payload(
-                    payloads=[
-                        {
-                            "runsheet": lambda: dataset.data_assets["runsheet"].path,
-                            "sampleTable": lambda: dataset.data_assets[
-                                "sample table"
-                            ].path,
-                        }
-                    ]
-                ):
-                    vp.add(
-                        checks.check_sample_table_against_runsheet,
-                        config={"all_samples_required": True},
-                        full_description=textwrap.dedent(f"""
-                            - Check: Ensure all samples denoted in the runsheet are present
-                                - Reason:
-                                    - Sample Table should be inclusive of all samples processed
-                                - Potential Source of Problems:
-                                    - Bug in processing script that results in missing sample table columns
-                                - Flag Condition:
-                                    - Green: All samples present
-                                    - Halt: At least one sample indicated in runsheet is missing
-                        """)
-                    )
-                    vp.add(
-                        checks.check_sample_table_for_correct_group_assignments,
-                        full_description=textwrap.dedent(f"""
-                            - Check: Ensure sample to group mapping consistent with runsheet
-                                - Reason:
-                                    - Group mapping indicated by sample table. Mis-mapping will result in incorrect DE output
-                                - Potential Source of Problems:
-                                    - Bug in processing script
-                                - Flag Condition:
-                                    - Green: Consistent sample to group mapping based on runsheet
-                                    - Halt: At least one inconsistency in mapping
-                        """)
-                        )
-
-            with vp.component_start(
-                name="Contrasts Table",
-                description="",
-            ):
-                with vp.payload(
-                    payloads=[
-                        {
-                            "runsheet": lambda: dataset.data_assets["runsheet"].path,
-                            "contrasts_table": lambda: dataset.data_assets[
-                                "DE contrasts table"
-                            ].path,
-                        }
-                    ]
-                ):
-                    vp.add(
-                        checks.check_contrasts_table_headers,
-                        full_description=textwrap.dedent(f"""
-                            - Check: Ensure contrast table header correctly formatted using runsheet as reference for groups
-                                - Reason:
-                                    - Incorrect contrasts will result in incorrect DE output
-                                - Potential Source of Problems:
-                                    - Bug in processing script
-                                - Flag Condition:
-                                    - Green: Consistent contrast header based on runsheet
-                                    - Halt: At least one inconsistency in header
-                        """)
-                        )
-                    vp.add(
-                        bulkRNASeq.checks.check_contrasts_table_rows,
-                        full_description=textwrap.dedent(f"""
-                            - Check: Ensure contrast table rows correctly formatted using runsheet as reference for groups
-                                - Reason:
-                                    - Incorrect rows will result in incorrect DE output as groups will become mis-mapped in contrasts
-                                - Potential Source of Problems:
-                                    - Bug in processing script
-                                - Flag Condition:
-                                    - Green: Consistent contrast rows (i.e. groups) based on runsheet 
-                                    - Halt: At least one inconsistency in rows
-                        """)
-                        )
-        with vp.component_start(
-            name="DE Output",
-            description="",
-        ):
-            with vp.component_start(
-                name="Viz Tables",
-                description="Extended from the dge tables",
-            ):
-                with vp.payload(
-                    payloads=[
-                        {
-                            "samples": lambda: set(dataset.samples),
-                            "pca_table": lambda: dataset.data_assets[
-                                "viz PCA table"
-                            ].path,
-                        }
-                    ]
-                ):
-                    vp.add(
-                        bulkRNASeq.checks.check_viz_pca_table_index_and_columns_exist,
-                        full_description=textwrap.dedent(f"""
-                                - Check: Ensure all samples (row-indices) present and columns PC1, PC2 and PC3 are present
-                                    - Reason:
-                                        - PCA table should include all samples and PC1, PC2, PC3 (for 3D PCA viz)
-                                    - Potential Source of Problems:
-                                        - Bug in processing script
-                                    - Flag Condition:
-                                        - Green: All samples and all columns present
-                                        - Halt: At least one sample or column is missing
-                            """)
-                        )
         with vp.component_start(
             name="Processing Report",
             description="",

diff --git a/.../Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/modules/VV_AFFYMETRIX.nf b/.../Affymetrix/Workflow_Documentation/NF_MAAffymetrix/workflow_code/modules/VV_AFFYMETRIX.nf
@@ -30,7 +30,7 @@ process VV_AFFYMETRIX {
     path("Metadata/*_runsheet.csv"), emit: VVed_runsheet
     path("00-RawData/*"), emit: VVed_rawData
     path("01-oligo_NormExp/*"), emit: VVed_NormExp
-    path("02-limma_DGE/*"), emit: VVed_DGE
+    path("02-limma_DGE/*"), emit: VVed_DGE, optional: true
     path("VV_report_GLmicroarray.tsv.MANUAL_CHECKS_PENDING"), optional: params.skipVV, emit: log
     path("versions.yml"), emit: versions