-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.R
51 lines (43 loc) · 1.44 KB
/
util.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
suppressPackageStartupMessages(library(dplyr))
# Suppress summarise info
options(dplyr.summarise.inform = FALSE)
# Probabilistically selects an index from the vector of probabilities
selectIndexFromProbabilities <-function(vv) {
if(is.null(vv) || is.na(vv)) return(vv)
if(length(vv)==0 || length(vv)==1) return(length(vv))
vv<-vv/sum(vv) # normalise to 1
v<-cumsum(vv) # cumulative sum to 1
roll<-runif(1)
select<-match(TRUE,v>roll) # pick the first col that is higher than the dice roll
return(select)
}
# Timestamped console output
echo<- function(msg) {
cat(paste0(as.character(Sys.time()), ' | ', msg))
}
# Progress bar
printProgress<-function(row, char, majorInterval=100, minorInterval=10) {
if(is.null(row) || is.na(row) || !is.numeric(row)) return()
if((row-1)%%majorInterval==0) echo('')
cat(char)
if(row%%minorInterval==0) cat('|')
if(row%%majorInterval==0) cat(paste0(' ', row,'\n'))
}
getGroupIds<-function(filterCsv) {
groups<- getGroups(filterCsv)
groupIds <- unique(groups$cluster_id_5)
return(groupIds)
}
getGroups<-function(filterCsv) {
gz1 <- gzfile(filterCsv,'rt')
data<-read.csv(gz1,header = T,sep=',',stringsAsFactors = F,strip.white = T)
close(gz1)
datacols<-c("sex",
"min_age",
"max_age",
"cluster_id_5")
filters <- data[,datacols] %>%
group_by(cluster_id_5,sex) %>%
summarise(age_start=min(min_age), age_end=max(max_age))
return(filters)
}