forked from nasa/GeneLab_Data_Processing
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nasa#29 - Based on: 1092e80 2dd8fce 11a22f0 7ba75d0 4e317f2 5d35b61 7d2cc24 53363e4 1b6e325 b5013e9 574eb79 fc89c5e 1500019 6719218 35c9823 af0b716 cbf6055 6a0d105 ee188eb a8c65d3 - Changed dge module to DGE_BY_DESEQ2, added test modules - Removed deprecated conda support files - Updated ensembl file used for test dataset VV
- Loading branch information
1 parent
61be4ad
commit d857577
Showing
9 changed files
with
185 additions
and
254 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,49 +7,70 @@ params: | |
input_gene_results_dir: "" | ||
# One and only one of the following must be specified | ||
runsheet_path: NULL | ||
isa_path: NULL | ||
|
||
primary_keytype: "" # Denotes the name of the indentifier column (e.g. ENSEMBL, TAIR) | ||
normalization: "default" # ENUM like, supports "ERCC-groupB" and "default" | ||
normalized_counts_output_prefix: "" | ||
dge_output_prefix: "" | ||
DEBUG_MODE_LIMIT_GENES: FALSE | ||
DEBUG_MODE_ADD_DUMMY_COUNTS: FALSE | ||
work_dir: "." # should be set to launch directory | ||
verbose: FALSE | ||
work_dir: "." # NON_DPPD: should be set to launch directory | ||
SUMMARY_FILE_PATH: "summary.txt" | ||
--- | ||
|
||
## Substeps {.tabset} | ||
|
||
### 1. Setup | ||
|
||
<!--- START:NON_DPPD ---> | ||
```{r, setup, include=FALSE} | ||
knitr::opts_knit$set(root.dir = params$work_dir) | ||
library(knitr) | ||
``` | ||
|
||
```{r libary-loading, message = params$verbose, warning = params$verbose} | ||
```{r libary-loading} | ||
# allow more flexibility in download time | ||
# useful for slower connections where the default of 60 seconds might be exceeded | ||
options(timeout=600) | ||
# Import libraries (tximport, DESeq2, tidyverse, Risa) | ||
# Import libraries (tximport, DESeq2, tidyverse) | ||
library(tximport) | ||
library(DESeq2) | ||
library(stringr) | ||
params | ||
SUMMARY_FILE_PATH <- params$SUMMARY_FILE_PATH | ||
yaml::write_yaml(params, "last_params.yml") | ||
``` | ||
```{r validate_params} | ||
# assert either runsheet_path OR isa_path supplied in params | ||
if (!xor(!is.null(params$runsheet_path), !is.null(params$isa_path))) { | ||
stop("Must supply EITHER runsheet_path or isa_path in params") | ||
} | ||
# END:NON_DPPD | ||
# START:ONLY_DPPD | ||
# params <- c( | ||
# runsheet_path = "/path/to/runsheet", # Used for downloading | ||
# input_gene_results_dir = "/path/to/genes_results_files", # Location of the gene results files | ||
# primary_keytype = "", # Denotes the name of the indentifier column (e.g. ENSEMBL, TAIR) | ||
# normalization = "", # ENUM like, supports "ERCC-groupB" and "default" | ||
# normalized_counts_output_prefix = "", # Output prefix for normalized counts files | ||
# dge_output_prefix = "" # Output prefix for DGE files | ||
# ) | ||
# END:ONLY_DPPD | ||
``` | ||
|
||
### 2. Load Study Metadata | ||
```{r runsheet-to-compare_df, include=(!is.null(params$runsheet_path)), eval=(!is.null(params$runsheet_path))} | ||
```{r runsheet-to-compare_df} | ||
#' Calculate the square of a number | ||
#' | ||
#' This function takes a numeric input and returns its square. | ||
#' | ||
#' @param x Numeric value to be squared. | ||
#' | ||
#' @return The square of the input value. | ||
#' | ||
#' @examples | ||
#' square(2) | ||
#' # Output: 4 | ||
#' | ||
#' square(-3) | ||
#' # Output: 9 | ||
#' | ||
compare_csv_from_runsheet <- function(runsheet_path) { | ||
df = read.csv(runsheet_path) | ||
# get only Factor Value columns | ||
|
@@ -64,25 +85,6 @@ compare_csv <- compare_csv_from_runsheet(params$runsheet_path) | |
#DT::datatable(compare_csv, caption = "Data Frame of parsed runsheet filtered to required columns") | ||
``` | ||
|
||
```{r isa-to-compare_df, include=(!is.null(params$isa_path)), eval=(!is.null(params$isa_path))} | ||
# TODO: Remove this route, ISA zip support will be dropped as of DPPD-7101-F | ||
library(Risa) | ||
compare_csv_from_isa_archive <- function(isa_path) { | ||
td = tempdir() | ||
unzip(isa_path, exdir = td) | ||
isa <- Risa::readISAtab(path = td) | ||
n = as.numeric(which([email protected] == "RNA Sequencing (RNA-Seq)")) | ||
isa_tabs <- [email protected][[n]]@assay.file | ||
factors <- as.data.frame(isa@factors[[1]], stringsAsFactors = FALSE) | ||
colnames(factors) <- paste("factor",1:dim(factors)[2], sep = "_") | ||
return(data.frame(sample_id = isa_tabs$`Sample Name`, factors)) | ||
} | ||
# Loading metadata from isa archive | ||
compare_csv <- compare_csv_from_isa_archive(params$isa_path) | ||
#DT::datatable(compare_csv, caption = "Data Frame of parsed isa archive filtered to required metadata") | ||
``` | ||
|
||
```{r compare_df-to-study_df} | ||
study <- as.data.frame(compare_csv[,2:dim(compare_csv)[2]]) | ||
colnames(study) <- colnames(compare_csv)[2:dim(compare_csv)[2]] | ||
|
@@ -130,8 +132,7 @@ files <- list.files( | |
## Reorder the *genes.results files to match the ordering of the ISA samples | ||
# Replace spaces in sample names from ISA with "_", consistent with runsheet generation | ||
samples = str_replace_all(rownames(study), " ", "_") | ||
samples = rownames(study) | ||
reordering <- sapply(samples, function(x)grep(paste0("Rsem_gene_counts/", x,".genes.results$"), files, value=FALSE)) | ||
files <- files[reordering] | ||
names(files) <- samples | ||
|
@@ -335,12 +336,12 @@ output_table_1$LRT.p.value <- res_1_lrt@listData$padj | |
```{r wald-test-iteration} | ||
## Iterate through Wald Tests to generate pairwise comparisons of all groups | ||
for (i in 1:dim(contrasts)[2]){ | ||
res_1 <- results(dds_1, contrast=c("condition",contrasts[1,i],contrasts[2,i])) | ||
res_1 <- as.data.frame(res_1@listData)[,c(2,4,5,6)] | ||
colnames(res_1)<-c(paste0("Log2fc_",colnames(contrasts)[i]),paste0("Stat_",colnames(contrasts)[i]),paste0("P.value_",colnames(contrasts)[i]),paste0("Adj.p.value_",colnames(contrasts)[i])) | ||
output_table_1<-cbind(output_table_1,res_1) | ||
rm(res_1) | ||
res_1 <- results(dds_1, contrast=c("condition",contrasts[1,i],contrasts[2,i])) | ||
res_1 <- as.data.frame(res_1@listData)[,c(2,4,5,6)] | ||
colnames(res_1)<-c(paste0("Log2fc_",colnames(contrasts)[i]),paste0("Stat_",colnames(contrasts)[i]),paste0("P.value_",colnames(contrasts)[i]),paste0("Adj.p.value_",colnames(contrasts)[i])) | ||
output_table_1<-cbind(output_table_1,res_1) | ||
} | ||
``` | ||
|
||
```{r} | ||
|
@@ -385,6 +386,16 @@ write.csv( | |
sampleTable, | ||
file = paste0(params$dge_output_prefix, "SampleTable.csv") | ||
) | ||
# Create summary file based on output_table_1 | ||
output <- capture.output(summary(output_table_1)) | ||
# Open file connection | ||
conn <- file(paste0(params$dge_output_prefix, "summary.txt"), "w") | ||
# Write the captured output to the file | ||
writeLines(output, conn) | ||
# DT::datatable(head(output_table_1, n = 30), | ||
# caption = "First 30 rows of differential gene expression table", | ||
# extensions = "FixedColumns", | ||
|
@@ -408,4 +419,4 @@ cat(capture.output(sessionInfo()), | |
file = version_output_fn, | ||
append = TRUE, | ||
sep = "\n") | ||
``` | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
...ow_Documentation/NF_RCP-F/workflow_code/tests/modules/DGE_BY_DESEQ2/DGE_BY_DESEQ2.nf.test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
nextflow_process { | ||
|
||
name "Test Process DGE_BY_DESEQ2" | ||
script "modules/DGE_BY_DESEQ2/main.nf" | ||
process "DGE_BY_DESEQ2" | ||
|
||
test("GLDS-194") { | ||
tag 'DGE_BY_DESEQ2' | ||
|
||
when { | ||
params { | ||
// define parameters here. Example: | ||
use_dummy_gene_counts = true | ||
} | ||
process { | ||
""" | ||
// define inputs of the process here. Example: | ||
input[0] = file("test-datasets/testdata/GLDS-194/Metadata/GLDS-194_bulkRNASeq_v1_runsheet.csv") | ||
input[1] = file("test-datasets/testdata/GLDS-194/03-RSEM_Counts/*.genes.results") | ||
input[2] = [ primary_keytype:'ENSEMBL', has_ercc:true ] | ||
input[3] = file("https://figshare.com/ndownloader/files/36597114") | ||
input[4] = file("${ baseDir }/bin/dge_annotation_R_scripts.zip") | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assert process.success | ||
assert snapshot( | ||
process.out.summary, | ||
process.out.norm_counts, | ||
process.out.summary_ercc, | ||
process.out.norm_counts_ercc, | ||
process.out.version | ||
).match() | ||
} | ||
|
||
} | ||
|
||
test("GLDS-321:55_.ISSUE") { | ||
tag 'DGE_BY_DESEQ2' | ||
|
||
when { | ||
params { | ||
// define parameters here. Example: | ||
use_dummy_gene_counts = true | ||
} | ||
process { | ||
""" | ||
// define inputs of the process here. Example: | ||
input[0] = file("test-datasets/testdata/GLDS-321/Metadata/GLDS-321_bulkRNASeq_v1_runsheet.csv") | ||
input[1] = file("test-datasets/testdata/GLDS-321/03-RSEM_Counts/*.genes.results") | ||
input[2] = [ primary_keytype:'TAIR', has_ercc:false ] | ||
input[3] = file("https://figshare.com/ndownloader/files/36597132") | ||
input[4] = file("${ baseDir }/bin/dge_annotation_R_scripts.zip") | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assert process.success | ||
assert snapshot( | ||
process.out.summary, | ||
process.out.norm_counts, | ||
process.out.version, | ||
).match() | ||
} | ||
|
||
} | ||
|
||
} |
Oops, something went wrong.