-
Notifications
You must be signed in to change notification settings - Fork 0
/
db.R
34 lines (26 loc) · 1.31 KB
/
db.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
##
## Write the result of a Redshift sql query to a dataframe
##
library(RPostgreSQL)
library(uuid)
ReadSqlResults <- function(con, sql.query, aws.access.key, aws.secret.key, s3.bucket, redshift.iam.role) {
path <- paste0("data/tmp/", UUIDgenerate())
print("Running SQL query")
# Escape single quotes in the query
escaped.sql.query <- gsub("[\r\n]", "", sql.query)
escaped.sql.query <- gsub("'", "\\\\'", escaped.sql.query)
# Unload query result to s3
unload.query <- paste0("UNLOAD ('", escaped.sql.query, "') TO 's3://", s3.bucket, "/", path, "' iam_role '", redshift.iam.role, "' HEADER DELIMITER AS ',' GZIP ALLOWOVERWRITE ADDQUOTES;")
data <- dbGetQuery(con, unload.query)
# Download dumps from s3 using aws cli
cp.cmd <- paste0("AWS_ACCESS_KEY_ID=", aws.access.key, " AWS_SECRET_ACCESS_KEY=", aws.secret.key," aws s3 cp s3://", s3.bucket,"/ . --recursive --exclude '*' --include '", path, "*'")
system(cp.cmd)
# Merge the dumps in a single dataframe
print("Reading csv dumps")
files <- list.files(path=dirname(path), full.names=TRUE, pattern = paste0(basename(path), ".*.gz"))
datalist <- lapply(files, function(x){read.csv(file=x, sep = ",", quote = '"', header=TRUE)})
print("Merging csv dumps in a single dataframe")
dataset <- do.call(rbind, datalist)
unlink(files)
return(dataset)
}