Skip to content

Commit

Permalink
NF_MAAffymetrix: nasa#104 remove all DE outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
cyouh95 committed Jul 23, 2024
1 parent 252b1b7 commit 0769d69
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 162 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -760,9 +760,11 @@ runsheetToDesignMatrix <- function(runsheet_path) {
design_data <- runsheetToDesignMatrix(runsheet)
design <- design_data$matrix
# Write SampleTable.csv and contrasts.csv file
write.csv(design_data$groups, file.path(DIR_DGE, "SampleTable_GLmicroarray.csv"), row.names = FALSE)
write.csv(design_data$contrasts, file.path(DIR_DGE, "contrasts_GLmicroarray.csv"))
if (params$run_DE) {
# Write SampleTable.csv and contrasts.csv file
write.csv(design_data$groups, file.path(DIR_DGE, "SampleTable_GLmicroarray.csv"), row.names = FALSE)
write.csv(design_data$contrasts, file.path(DIR_DGE, "contrasts_GLmicroarray.csv"))
}
```

### Perform Individual Probeset Level DE
Expand Down Expand Up @@ -858,11 +860,11 @@ write.csv(
SAMPLE_COLUMN_ORDER)
], file.path(DIR_NORMALIZED_EXPRESSION, "normalized_expression_probeset_GLmicroarray.csv"), row.names = FALSE)
### Generate and export PCA table for GeneLab visualization plots
PCA_raw <- prcomp(t(exprs(probeset_level_data)), scale = FALSE) # Note: expression at the Probeset level is already log2 transformed
write.csv(PCA_raw$x,
file.path(DIR_DGE, "visualization_PCA_table_GLmicroarray.csv")
)
if (params$run_DE) {
### Generate and export PCA table for GeneLab visualization plots
PCA_raw <- prcomp(t(exprs(probeset_level_data)), scale = FALSE) # Note: expression at the Probeset level is already log2 transformed
write.csv(PCA_raw$x, file.path(DIR_DGE, "visualization_PCA_table_GLmicroarray.csv"))
}
## Determine column order for probe level tables
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,31 +183,6 @@ data assets:

resource categories: *neverPublished

sample table:
processed location:
- *DGEDataDir
- "SampleTable_GLmicroarray.csv"

tags:
- processed

resource categories: &DGEAnalysisData
subcategory: Differential Expression Analysis Data
subdirectory: ""
publish to repo: true
include subdirectory in table: false
table order: 14

DE contrasts table:
processed location:
- *DGEDataDir
- "contrasts_GLmicroarray.csv"

tags:
- processed

resource categories: *DGEAnalysisData

raw probe intensities table:
processed location:
- *RawDir
Expand Down Expand Up @@ -253,16 +228,6 @@ data assets:
include subdirectory in table: false
table order: 2

viz PCA table:
processed location:
- *DGEDataDir
- "visualization_PCA_table_GLmicroarray.csv"

tags:
- processed

resource categories: *neverPublished

data asset sets:
# These assets are not generated in the workflow, but are generated after the workflow
PUTATIVE: []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,124 +131,6 @@ def validate(
- Halt: At least one condition failed
""")
)
with vp.component_start(
name="DE Metadata",
description="",
):

with vp.component_start(
name="Sample Table",
description="",
):
with vp.payload(
payloads=[
{
"runsheet": lambda: dataset.data_assets["runsheet"].path,
"sampleTable": lambda: dataset.data_assets[
"sample table"
].path,
}
]
):
vp.add(
checks.check_sample_table_against_runsheet,
config={"all_samples_required": True},
full_description=textwrap.dedent(f"""
- Check: Ensure all samples denoted in the runsheet are present
- Reason:
- Sample Table should be inclusive of all samples processed
- Potential Source of Problems:
- Bug in processing script that results in missing sample table columns
- Flag Condition:
- Green: All samples present
- Halt: At least one sample indicated in runsheet is missing
""")
)
vp.add(
checks.check_sample_table_for_correct_group_assignments,
full_description=textwrap.dedent(f"""
- Check: Ensure sample to group mapping consistent with runsheet
- Reason:
- Group mapping indicated by sample table. Mis-mapping will result in incorrect DE output
- Potential Source of Problems:
- Bug in processing script
- Flag Condition:
- Green: Consistent sample to group mapping based on runsheet
- Halt: At least one inconsistency in mapping
""")
)

with vp.component_start(
name="Contrasts Table",
description="",
):
with vp.payload(
payloads=[
{
"runsheet": lambda: dataset.data_assets["runsheet"].path,
"contrasts_table": lambda: dataset.data_assets[
"DE contrasts table"
].path,
}
]
):
vp.add(
checks.check_contrasts_table_headers,
full_description=textwrap.dedent(f"""
- Check: Ensure contrast table header correctly formatted using runsheet as reference for groups
- Reason:
- Incorrect contrasts will result in incorrect DE output
- Potential Source of Problems:
- Bug in processing script
- Flag Condition:
- Green: Consistent contrast header based on runsheet
- Halt: At least one inconsistency in header
""")
)
vp.add(
bulkRNASeq.checks.check_contrasts_table_rows,
full_description=textwrap.dedent(f"""
- Check: Ensure contrast table rows correctly formatted using runsheet as reference for groups
- Reason:
- Incorrect rows will result in incorrect DE output as groups will become mis-mapped in contrasts
- Potential Source of Problems:
- Bug in processing script
- Flag Condition:
- Green: Consistent contrast rows (i.e. groups) based on runsheet
- Halt: At least one inconsistency in rows
""")
)
with vp.component_start(
name="DE Output",
description="",
):
with vp.component_start(
name="Viz Tables",
description="Extended from the dge tables",
):
with vp.payload(
payloads=[
{
"samples": lambda: set(dataset.samples),
"pca_table": lambda: dataset.data_assets[
"viz PCA table"
].path,
}
]
):
vp.add(
bulkRNASeq.checks.check_viz_pca_table_index_and_columns_exist,
full_description=textwrap.dedent(f"""
- Check: Ensure all samples (row-indices) present and columns PC1, PC2 and PC3 are present
- Reason:
- PCA table should include all samples and PC1, PC2, PC3 (for 3D PCA viz)
- Potential Source of Problems:
- Bug in processing script
- Flag Condition:
- Green: All samples and all columns present
- Halt: At least one sample or column is missing
""")
)
with vp.component_start(
name="Processing Report",
description="",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ process VV_AFFYMETRIX {
path("Metadata/*_runsheet.csv"), emit: VVed_runsheet
path("00-RawData/*"), emit: VVed_rawData
path("01-oligo_NormExp/*"), emit: VVed_NormExp
path("02-limma_DGE/*"), emit: VVed_DGE
path("02-limma_DGE/*"), emit: VVed_DGE, optional: true
path("VV_report_GLmicroarray.tsv.MANUAL_CHECKS_PENDING"), optional: params.skipVV, emit: log
path("versions.yml"), emit: versions

Expand Down

0 comments on commit 0769d69

Please sign in to comment.