Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

- Start of ToxicRmod commit #211

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions ToxicR workinprogress.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
library("devtools")
#requires dependency Cmake
#devtools::install_github("NIEHS/ToxicR")
library("ToxicR")
library("dplyr")
library("PMCMRplus")
library(parallel)
library("pbmcapply")
library(foreach)
library(doParallel)

#bmdoutput <- read.table("~/storage/biomarker_input_.txt",
# header=FALSE, sep = "\t", row.names = 1)
bmdoutput <- read.table("~/storage/ToxicR mod/data for BMDExpress log2 CPM Males.txt",
header=FALSE, sep = "\t", row.names = 1)
#
#bmdoutput <- bmdoutput[-3,-c(1:3,5:9,11:15,17:18)]
#
bmdoutput <- as.data.frame(t(bmdoutput))
bmdoutput <-bmdoutput[,-1]
bmdoutput[-1] <- lapply(bmdoutput[-1], function(x) {
as.numeric(as.character(x))
})

dose_column <-as.numeric(bmdoutput[,1])
start.time <- Sys.time()

Func <- function(resp_bygene_column){
trendtestresgreater<- williamsTest(resp_bygene_column ~ dose_column, alternative = "greater")
trendtestresdfgreater <- data_frame("pass/fail value" = trendtestresgreater$crit.value[,1] - trendtestresgreater$statistic[,1])

trendtestresless<- williamsTest(resp_bygene_column ~ dose_column, alternative = "less")
trendtestresdfless <- data_frame("pass/fail value" = trendtestresgreater$crit.value[,1] - trendtestresgreater$statistic[,1])

trendtestresdf <- rbind(trendtestresdfgreater, trendtestresdfless)

#only generate model when significant based on williams test
if (any(trendtestresdf$`pass/fail value` <0)){

#cl <- makeCluster(detectCores())
#registerDoParallel(cl)
#clusterEvalQ(cl, {
# library("ToxicR")
#})
#clusterExport(cl, c("dose_column"))
modeltypes = c("hill","exp-3","exp-5","power","polynomial")
models <- foreach(model_type = modeltypes) %dopar% {
single_continuous_fit(dose_column, resp_bygene_column, model_type = model_type)
}
#stopCluster(cl)



#runmodels <- function(modeltype){
# single_continuous_fit(dose_column, resp_bygene_column, model_type = modeltype)
#}
#models <- pbmclapply(modeltypes,runmodels, mc.cores = detectCores())

modeloptions <- sapply(models, function(model) model$maximum)
index_of_best <- which.max(modeloptions)
model <- sapply(models[[index_of_best]], function(model) model)
#capture p-value output from ToxicR for goodness of fit
modeloutput<- capture.output(summary(model))
all_numbers_mean_adequate_pvalue <- lapply(modeloutput[9:10], function(line) {
as.numeric(tail(regmatches(line, gregexpr("\\b\\d+\\.\\d+\\b", line))[[1]],n=1))
})
#only return model if goodness of fit is significant
if(!any(as.data.frame(all_numbers_mean_adequate_pvalue) >0.05)){
return(list(as.numeric(model$bmd[1]),model$model))
}
}
}

result <-pbmclapply(bmdoutput[,2:ncol(bmdoutput)], Func, mc.cores = detectCores())

filtered_list <- Filter(Negate(is.null), result)
BMDs <- lapply(filtered_list, function(gene) as.numeric(gene[1]))
modeltypesresults <- lapply(filtered_list, function(gene) as.character(gene[2]))
#structuring of results to remove NULLs and have rows for output
significant_BMD_table <- data.frame("Gene" = names(filtered_list),
"BMD" = unlist(BMDs),
"Modeltype" = unlist(modeltypesresults),row.names = NULL)

end.time <- Sys.time()
time.taken <- round(end.time - start.time,2)
time.taken
write.table(significant_BMD_table, file ="~/storage/toxicR output.txt", sep=" ", col.names = TRUE)
129 changes: 129 additions & 0 deletions outputBMD.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
---
params:
# "191113_Human_S1500_Surrogate_2.0_Manifest.csv"
# "181019_Human_S1500_Surrogate_1.2_Manifest.txt"
# "191004_Human_Whole_Transcriptome_2.0_Manifest.txt"
# "wikipathways-20210810-gmt-Mus_musculus.gmt"
# "wikipathways-20210810-gmt-Rattus_norvegicus.gmt"
projectdir: null # Use when loading. If null, here::here() will be used. Can also hard code if for some reason you need that
project_title: "GeneTox21" # Change depending on your project name
project_name: "Your name here"
project_description: null # description of project for report, optional
analysis_name: "default" # short string identifying analysis settings. This will appear in the analysis directory and file names
batch_var: "batch" # For example, multiple plates
dose: "dose" # If there is a dose in the experiment; otherwise use NULL
platform: "TempO-Seq" # TempO-Seq Or RNA-Seq
nmr_threshold: 100000 # 10% of 1M reads for TempOSeq = 100,000; 10% of 10M reads for RNA-Seq = 1,000,000.
write_additional_output: FALSE # export BMD, biomarker, and biosets output? facet variable should contain chemical and timepoint information
celltype: "MCF7 cells" # only required for biosets output. Cell type or species used
units: "uM" # only required for biosets output. Units of dose
species: "human" # one of human, mouse, rat, hamster
design: "group" # single experimental group of interest; entries in this column must match the contrast names.
intgroup: ["group"] # experimental group of interest plus covariates; can be more than on
intgroup_to_plot: ["group"] # for PCA plots, add tabs for one or more groups to color
formula_override: null # e.g., "~batch + condition", to enable users to specify the DESeq2 formula to use
group_facet: "chemical" # If you have many different experimental groups, you may subset the report by specifying a column in the metadata to filter groups, and then setting the group of interest in group_filter
group_filter: null # Which group will this report be done on?
display_group_facet: null #
display_group_filter: null #
sortcol: "dose" # Optionally, a column by which to sort the contrasts
lenient_contrasts: FALSE # Use either column (exp, cont) of contrasts file to limit what is included in the report (instead of just exp column)
strict_contrasts: FALSE # Use BOTH columns (exp, cont) of contrasts file to limit what is included in the report
exclude_samples: null # Optionally, a vector of sample names to exclude from the analysis
exclude_groups: null # Optionally, a vector of groups to exclude from the analysis. By default this is assumed to be in the column specified by params$design.
include_only_column: null # Restrict analysis to group(s) in the column listed here based on params$include_only_group.
include_only_group: null # Restrict analysis to this/these group(s) within the column listed in params$include_only_column
cpus: 41 # Set to a lower number (e.g., 2 to 4) if you aren't working in a server environment
run_pathway_analysis: TRUE # Optionally disable pathway analysis if not available for your organism
wikipathways_directory: "~/shared/dbs/wikipathways"
linear_fc_filter_DEGs: 1.5 # Default 1.5
linear_fc_filter_biosets: 1.2 # Default 1.2. Used for biosets output when write_additional_output=True
biospyder_dbs: "~/shared/dbs/biospyder/"
biospyder_manifest_file: "181019_Human_S1500_Surrogate_1.2_Manifest.txt"
wikipathways_filename: "wikipathways-20210810-gmt-Homo_sapiens.gmt"
KEGGpathways_filename: "c2.cp.kegg_medicus.v2023.2.Hs.symbols.gmt"
nBestFeatures: 20 # The number of best features to make plots of their counts
nBest: 100 # Number of features to include in table and limiting PCA/clustering analysis
nHeatmap: 50 # Number of most variable genes for heatmap
nHeatmapDEGs: 50 # Number of DEGs for heatmap
cooks: FALSE # the DESeq Cook's distance cutoff, or FALSE to disable it
filter_gene_counts: FALSE # Filter genes to those with at least 1 count in 1 sample. Disabled by default for biomarker file output. Can enable to increase speed of analysis
generate_main_report: TRUE
generate_stats_report: TRUE
generate_data_explorer_report: TRUE
generate_go_pathway_report: TRUE
output_digits: 5
parallel: FALSE
species_data: null
count_data_file: null
sampledata_sep: null
MinCount: null
alpha: 0.05
feature_id: null
biospyder: null
output:
html_document:
toc: true
toc_float: true
number_sections: true
code_folding: hide
theme: spacelab # flatly spacelab sandstone cerulean
code_download: true
runtime: shiny
---


```{r run ToxicR, echo = FALSE,warning = FALSE, message = FALSE, progress = FALSE}
source("~/storage/R-ODAF_Health_Canada/ToxicR workinprogress.R", local = TRUE)
```

```{r generate_table}
DT::datatable(significant_BMD_table,
options = list(pagingType = 'full_numbers',
pageLength = 20,
scrollX = '100%',
dom = 'Bfrtip',
buttons = c('copy',
'csv',
'excel',
'pdf',
'print',
'colvis'),
digits=3),
escape = FALSE,
extensions = 'Buttons',
rownames = ,
filter = "top",
)
```

```{r render_plot}
library(shiny)

server <- function(input, output) {
# Render the plot based on the model
# Render the selected column of significant_BMD_table as text
model <- reactive({
selected_gene <- input$select
single_continuous_fit(dose_column,
bmdoutput[, selected_gene],
model_type=significant_BMD_table[significant_BMD_table$Gene==selected_gene,"Modeltype"])
})
output$plot <- renderPlot({
# Plot the model
plot(model(), main = input$my_select)
})
}

ui <- fluidPage(
selectInput(inputId = "select", label = "Gene", choices = significant_BMD_table[, 1]),
#plotOutput("my_plot"),
plotOutput("plot"), # Display the selected column as text
)

shinyApp(ui = ui, server = server)




```
Loading