-
Notifications
You must be signed in to change notification settings - Fork 0
/
bangladesh_longitudinal_neighborhood.R
109 lines (86 loc) · 3.97 KB
/
bangladesh_longitudinal_neighborhood.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
## Longitudinal neighborhood analysis of a txon of interest
library(ggplot2)
# Get the bangladesh genus data
Bangladesh_genus <- getCollection(
microbiomeData::Bangladesh,
"16S (V4) Genus (Relative taxonomic abundance analysis)",
continuousMetadataOnly = FALSE
)
# Subset by age
min_samples_required <- 10
cool_taxon <- 'Bifidobacterium'
# We need to filter by ourselves, which means deconstructing the collection
sampleMetadata <- getSampleMetadata(Bangladesh_genus)
assayData <- microbiomeComputations::getAbundances(Bangladesh_genus)
ancestorIdColNames <- Bangladesh_genus@ancestorIdColumns # Remove me from calculations
recordIColName <- Bangladesh_genus@recordIdColumn # Use me to match data
# Create a list of correlation graphs, one for each age in ages. The original paper
# separated them by month, so let's do the same
ages <- floor(min(sampleMetadata$age_months)):ceiling(max(sampleMetadata$age_months))
graph_list <- lapply(1:(length(ages)-1), function(x) {
# Subset to the appropriate abundances for this age
age_samples <- sampleMetadata$Sample_Id[(sampleMetadata$age_months >= ages[x] & sampleMetadata$age_months < ages[x+1])]
# Bail if there aren't enough samples
if (length(age_samples) < min_samples_required) {
return(c())
} else {
age_abundances <- assayData[which(assayData$Sample_Id %in% age_samples), ]
# Make a new collection so we can send it through the correlation pipeline
age_collection <- microbiomeComputations::AbundanceData(
data=age_abundances,
name=paste("Age", ages[x], "to", ages[x+1], "months"),
recordIdColumn = recordIColName,
ancestorIdColumns = ancestorIdColNames)
age_correlation <- selfCorrelation(age_collection, method = 'pearson')
# Extract results. We want to do this ourselves to have more control over
# how we create the igraph object.
corr_stats <- data.table::setDT(age_correlation@statistics@statistics)
filtered_corr_stats <- corr_stats[abs(corr_stats$correlationCoef) >= 0.3 & corr_stats$pValue <= 0.05, ]
age_graph <- igraph::graph_from_data_frame(filtered_corr_stats, directed=FALSE)
# Now we want just the neighborhood of the node we care about
# Note an "ego graph" means "node neighborhood". Order=1 means only keep
# the direct neighbors of the node of interest. Returns a list of graphs
cool_taxon_neighborhood <- make_ego_graph(age_graph, order=1, nodes=which(V(age_graph)$name == cool_taxon))
V(cool_taxon_neighborhood[[1]])$color <- "white"
V(cool_taxon_neighborhood[[1]])$color[V(cool_taxon_neighborhood[[1]])$name == cool_taxon] <- "red"
# Plot! But only if there are any vertices
if (length(cool_taxon_neighborhood)>0 && length(V(cool_taxon_neighborhood[[1]])) > 0) {
igraph::plot.igraph(
cool_taxon_neighborhood[[1]],
arrow.mode=0,
# vertex.color="white",
vertex.label.dist=10,
vertex.label.color="black",
vertex.label.degree=0,
vertex.size=10,
main=paste("Age", ages[x], "to", ages[x+1], "months")
)
}
age_graph$age <- ages[x]
return(age_graph)
}
})
## Now can do network stats on the graph list
# Calculate cool_taxon's degree for each graph
# Note that the plot looks weird because cool_taxon gets degree 0 if there
# weren't enough samples, or if it didn't actually have any correlation
# friends that passed the thresholds
cool_degrees <- unlist(lapply(graph_list[1:60], function(g) {
if (length(g) > 0) {
node_degrees <- degree(g)
cool_degree <- node_degrees[which(names(node_degrees) == cool_taxon)]
if (length(cool_degree) ==0) {
return(0)
}
return(cool_degree)
} else {
return(0)
}
}))
gg_data <- data.frame(ages=unlist(lapply(graph_list[1:60], function(x) x$age)),cool_degrees)
# Plot
ggplot(gg_data, aes(x=ages, y=cool_degrees)) +
geom_line() +
geom_point() +
labs(x="Age (months)", y=paste("Degree of", cool_taxon), title=paste("Degree of", cool_taxon, "across early life")) +
theme_bw()