-
Notifications
You must be signed in to change notification settings - Fork 0
/
recipe1.rb
73 lines (64 loc) · 1.82 KB
/
recipe1.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
####################################################
# Recipe1: Download file and save it as local file
#
# https://tumugi.github.io/recipe1/
#
# 1. Download archived daily access logs from
# remote servers using wget command
# 2. Count number of rows group by URI and
# save result into CSV file
####################################################
require 'ltsv'
require 'zip'
####################################################
# 1. Archived log download
####################################################
task :download_log, type: :command do
param :host, default: 'https://tumugi.github.io'
param :log_filename, type: :string
param :day, auto_bind: true, type: :time, required: true # <= This value is auto binding from CLI parameter
log_filename {
"access_#{day.strftime('%Y%m%d')}.log.zip"
}
command {
url = "#{host}/data/#{log_filename}"
"wget #{url} -O #{output.path}"
}
output {
target(:local_file, "tmp/#{log_filename}")
}
end
####################################################
# 2. Count rows group by URI
####################################################
task :count_rows_group_by_uri do
requires :download_log
output target(:local_file, '/tmp/result.csv')
run {
counts = {}
log input.path
Zip::File.open(input.path) do |zip_file|
zip_file.each do |entry|
entry.get_input_stream.each do |line|
values = LTSV.parse(line).first
counts[values[:uri]] ||= 0
counts[values[:uri]] += 1
end
end
end
output.open('w') do |o|
counts.each do |k, v|
o.puts "#{k},#{v}"
end
end
}
end
####################################################
# Main Task
####################################################
task :main do
requires :count_rows_group_by_uri
run {
log File.read(input.path)
}
end