-
Notifications
You must be signed in to change notification settings - Fork 0
/
ia-json.py
29 lines (22 loc) · 841 Bytes
/
ia-json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# Dumps JSON to textfile for items within an IA collection
import internetarchive
import json
import sys
import os
argv = sys.argv
collection = argv[1] # Collection name to scrape
output_file = argv[2] # Name of outfile for JSON dump
search_collection = internetarchive.search_items('collection:' + argv[1])
print str(search_collection.num_found) + " items in collection"
with open(output_file, 'w') as outfile:
outfile.write('{"collection_items" : [')
for result in search_collection:
item_identifier = result['identifier']
item = internetarchive.get_item(item_identifier)
print "Downloading " + item_identifier + " ..."
jdata = item.item_metadata['metadata']
json_record = json.dumps(jdata)
outfile.write(str(json_record) + ",")
outfile.seek(-1, os.SEEK_END)
outfile.truncate()
outfile.write(']}')