-
Notifications
You must be signed in to change notification settings - Fork 42
/
dataset_developing-dendritic-cells_schlitzer.R
47 lines (36 loc) · 1.51 KB
/
dataset_developing-dendritic-cells_schlitzer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
library(tidyverse)
library(dynbenchmark)
library(GEOquery)
options('download.file.method.GEOquery'='curl')
dataset_preprocessing("real/gold/developing-dendritic-cells_schlitzer")
# download and untar files
file <- download_dataset_source_file("GSE60781_RAW.tar", "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE60781&format=file")
untar(file, exdir = dataset_source_file(""))
# read counts
counts <- map_df(list.files(dataset_source_file(""), ".*read[cC]ount"), function(filename) {
read_tsv(dataset_source_file(filename), col_names = c("gene", "count"), col_types = cols(gene = "c", count = "i")) %>%
mutate(sample = gsub("_.*", "", filename))
}) %>%
spread(gene, count) %>%
as.data.frame() %>%
magrittr::set_rownames(NULL) %>%
column_to_rownames("sample") %>%
as.matrix
counts <- counts[, !(colnames(counts) %in% c("alignment_not_unique", "no_feature", "ambiguous"))]
# download cell info
geo <- GEOquery::getGEO("GSE60781", destdir = dataset_source_file(""))
cell_info <- geo[[1]] %>%
Biobase::phenoData() %>%
as("data.frame") %>%
mutate(milestone_id = gsub("_[0-9]*", "", title)) %>%
select(cell_id = geo_accession, milestone_id) %>%
slice(match(rownames(counts), cell_id))
milestone_network = tribble(
~from, ~to,
"MDP", "CDP",
"CDP", "PreDC"
) %>% mutate(length = 1, directed = TRUE)
milestone_ids <- c("MDP", "CDP", "PreDC")
cell_ids <- cell_info$cell_id
grouping <- cell_info %>% select(cell_id, milestone_id) %>% deframe()
save_raw_dataset(lst(milestone_network, cell_info, grouping, counts))