more refs

R-ODAF · Sep 20, 2024 · 020bd6d · 020bd6d
1 parent 43a40d8
commit 020bd6d
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 7 deletions.
diff --git a/Rmd/running_fisher.Rmd b/Rmd/running_fisher.Rmd
@@ -107,7 +107,7 @@ Report prepared by: `r params$bioinformatician_name`
 Report prepared for: `r params$researcher_name`
 
 ***
-Modified from code provided by Andrew Williams (Health Canada) and based on Running Fisher analysis as described in Kuperschmidt et al. (2010) [need to cite].
+Modified from code provided by Andrew Williams (Health Canada) and based on Running Fisher analysis as described in @kupershmidt_ontology_based_2010.
 
 Here is an explanation (that perhaps Andrew will provide?) of the Running Fisher method:
 
@@ -240,7 +240,7 @@ make_heatmap <- function (filtered_data, threshold_lo, threshold_hi, colours) {
 
   # Create the heatmap using the image function
   par(mar = c(2, 10, 14, 0), cex.main = 0.8)  # Margins: bottom, left, top, right
-  
+
   if (threshold_hi > 0) {
     image(1:ncol(data_matrix), 1:nrow(data_matrix), t(data_matrix),
           col = colours, breaks = c(-100, threshold_lo, threshold_hi, 100), axes = FALSE, xlab = "", ylab = "",
@@ -293,11 +293,6 @@ for(k in 1:length(bms)){
 ```
 
 ```{r load_biosets, echo=FALSE}
-# # uncomment code to run interactively
-# # Otherwise, the data will be loaded from bs by way of render_DESeq2_report.parallel.R
-#data_file <- file.path(paths$RData, paste0(params$project_title, "_DEG_data.RData"))
-#load(data_file)
-
 # Select EnsemblID as ID column
 # Note ensembl ID column needs to be renamed as Gene to match Andrew's code
 select_and_rename <- function(df) {

diff --git a/references/rf_references.bib b/references/rf_references.bib
@@ -174,6 +174,25 @@ @article{jonsson_hypoxia-independent_2016
 	pages = {929--939},
 }
 
+@article{kupershmidt_ontology_based_2010,
+	title = {Ontology-{Based} {Meta}-{Analysis} of {Global} {Collections} of {High}-{Throughput} {Public} {Data}},
+	volume = {5},
+	issn = {1932-6203},
+	url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013066},
+	doi = {10.1371/journal.pone.0013066},
+	abstract = {Background The investigation of the interconnections between the molecular and genetic events that govern biological systems is essential if we are to understand the development of disease and design effective novel treatments. Microarray and next-generation sequencing technologies have the potential to provide this information. However, taking full advantage of these approaches requires that biological connections be made across large quantities of highly heterogeneous genomic datasets. Leveraging the increasingly huge quantities of genomic data in the public domain is fast becoming one of the key challenges in the research community today. Methodology/Results We have developed a novel data mining framework that enables researchers to use this growing collection of public high-throughput data to investigate any set of genes or proteins. The connectivity between molecular states across thousands of heterogeneous datasets from microarrays and other genomic platforms is determined through a combination of rank-based enrichment statistics, meta-analyses, and biomedical ontologies. We address data quality concerns through dataset replication and meta-analysis and ensure that the majority of the findings are derived using multiple lines of evidence. As an example of our strategy and the utility of this framework, we apply our data mining approach to explore the biology of brown fat within the context of the thousands of publicly available gene expression datasets. Conclusions Our work presents a practical strategy for organizing, mining, and correlating global collections of large-scale genomic data to explore normal and disease biology. Using a hypothesis-free approach, we demonstrate how a data-driven analysis across very large collections of genomic data can reveal novel discoveries and evidence to support existing hypothesis.},
+	language = {en},
+	number = {9},
+	urldate = {2024-06-17},
+	journal = {PLOS ONE},
+	author = {Kupershmidt, Ilya and Su, Qiaojuan Jane and Grewal, Anoop and Sundaresh, Suman and Halperin, Inbal and Flynn, James and Shekar, Mamatha and Wang, Helen and Park, Jenny and Cui, Wenwu and Wall, Gregory D. and Wisotzkey, Robert and Alag, Satnam and Akhtari, Saeid and Ronaghi, Mostafa},
+	month = sep,
+	year = {2010},
+	note = {Publisher: Public Library of Science},
+	keywords = {tgx, Adipocytes, Adipose tissue, Data mining, Gene expression, Genetics of disease, Metaanalysis, Microarrays, Ontologies, stats, Read},
+	pages = {e13066},
+}
+
 @article{li_development_2015,
 	title = {Development of a toxicogenomics signature for genotoxicity using a dose-optimization and informatics strategy in human cells},
 	volume = {56},