-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_csv.rb
74 lines (71 loc) · 2.44 KB
/
create_csv.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
p "start at #{Time.now}"
# set shelf name
shelves = ["honkan01", "honkan02", "honkan02_in_process", "bunkan01", "bunkan01_in_process",
"bunkan02", "bunka02_in_process", "bunkan03", "bunkan03_in_process"]
# set item_identifier prefix
pre = "L8"
# create data
require "rexml/document"
require "time"
require 'csv'
arg = ARGV[0]
12.times do |month|
month += 1
datas = []
Dir["./import/#{arg}/datas#{month}*.xml"].each do |file|
begin
doc = REXML::Document.new File.new(file)
doc.elements.each("rss/channel/item") do |item|
data = Hash.new
data[:isbn] = item.elements["dc:identifier xsi:type=\"dcndl:ISBN\""].text rescue nil
data[:original_title] = item.elements["title"].text rescue nil
data[:title_transcription] = item.elements["dcndl:titleTranscription"].text rescue nil
data[:volume_number_string] = item.elements["dcndl:volume"].text rescue nil
data[:issue_number_string] = item.elements["dcterms:issued xsi:type=\"dcterms:W3CDTF\""].text rescue nil
data[:ndl_bib_id] = item.elements["dc:identifier xsi:type=\"dcndl:JPNO\""].text rescue nil
authors = item.elements["author"].text rescue nil
data[:creator] = authors.gsub(",",";") if authors
if false
authors.split(",").each_with_index do |a, index|
break if index > 2
data[:"author#{index+1}"] = a
end
end
data[:pub_date] = Time.rfc822(item.elements["pubDate"].text).strftime("%Y/%m/%d") rescue nil
datas << data
end
rescue => e
p e
end
end
p "#{arg}-#{month}: #{datas.size}"
next if datas.size == 0
# export TSV
CSV.open("import_#{arg}#{month}.txt", 'w', {:col_sep => "\t"}) do |row|
columns = ["", :kbn, :item_identifier, :isbn, :original_title, :title_transcription,
:pub_date, :volume_number_string, :issue_number_string, :creator, :shelf, :item_price, :call_number, "\n"]
# header
row << columns
# manifestation datas
i = 1
datas.each do |data|
csv_data = []
columns.each do |column|
case column
when "" || "\n"
csv_data << column
when :kbn
csv_data << "1"
when :item_identifier
csv_data << "#{pre}#{month}#{sprintf('%05d', i+=1)}"
when :shelf
csv_data << shelves[rand(shelves.length)]
else
csv_data << data[column] rescue ""
end
end
row << csv_data
end
end
end
p "end at #{Time.now}"