-
Notifications
You must be signed in to change notification settings - Fork 0
/
elastic_ex.R
125 lines (108 loc) · 2.4 KB
/
elastic_ex.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
source("load_packages.R")
connect()
# this works! c style string formatting
user_in <- 404
mmatch <- '
{
"query": {
"bool": {
"must": [
{ "match": {"HTTP_reply": "%i"}}
]
}
}
}
'
sprintf(mmatch, user_in) -> mmatch
Search(index = "weblog", body = mmatch)$hits$hits ->s
# do the following
# range query between two given dates
# histograms and other aggregations
# top visitors
# count for HTTP_reply's
# min and max reply_size
# avg reply sizes for top 10 visitors
# date range query
elastic_range <- function(){
date1 <- "1995-06-01 06:00:59"
date2 <- "1995-11-15 11:59:59"
# works
mmatch <- '
{
"query" : {
"bool" : {
"must" : {
"range" : {
"timestamp" : {
"gte" : "%s",
"lte" : "%s",
"format" : "yyyy-MM-dd HH:mm:ss"
}
}
}
}
}
}
'
sprintf(mmatch, date1, date2) -> mmatch
Search(index = "weblog", body = mmatch)$hits$total}
# top visitors
elastic_count_groupby_host <- function(){
mmatch <- '
{
"size" : 0,
"aggs" : {
"top_visitors" : {
"terms" : { "field" : "host"}
}
}
}
'
Search(index = "weblog", body = mmatch, raw = TRUE)}
# reply type counts
elastic_count_groupby_http <- function(){
mmatch <- '
{
"size" : 0,
"aggs" : {
"top_reply_types" : {
"terms" : { "field" : "HTTP_reply"}
}
}
}
'
Search(index = "weblog", body = mmatch, raw = TRUE)}
# max and min reply size
elastic_min_max <- function(){
mmatch <- '
{
"size" : 0,
"aggs" : {
"max_reply_size" : { "max" : {"field" : "reply_size"} },
"min_reply_size" : { "min" : {"field" : "reply_size"} }
}
}
'
Search(index = "weblog", body = mmatch, raw = TRUE)}
# for each top 10 visitor avg_reply size
elastic_group_by_avg <- function(){
mmatch <- '
{
"size" : 0,
"aggs" : {
"group_by_host" : {
"terms" : { "field" : "host"},
"aggs" : {
"avg_reply_size" : {
"avg" : {"field" : "reply_size"}
}
}
}
}
}
'
Search(index = "weblog", body = mmatch, raw = TRUE)}
microbenchmark(elastic_count_groupby_host(), elastic_count_groupby_http(),
elastic_group_by_avg(), elastic_min_max(),
elastic_range(), times = 10) -> elastic_results
write_csv(elastic_results, "elastic_results.csv")