-
Notifications
You must be signed in to change notification settings - Fork 45
/
scrape.py
55 lines (47 loc) · 1.89 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import re
import json
import csv
location = os.path.dirname(__file__)
#folder of NEAR ecosystem entities
directory = os.path.join(location, 'entities/')
#output json file
output_append = os.path.join(location, 'entities.json')
#open file, get md table, convert, store to json
def near_entitity_md_to_json(directory, filename, path):
with open (path, "r") as myfile:
data=myfile.read()
#extract md table from file and store to variable
try:
found = re.search('(?<=---)(.*)(?=---)', data, re.DOTALL).group()
except AttributeError:
# MD did not contain expected table
found = 'No Table Extracted'
d = {}
strip = '"'
for n, line in enumerate(found[1:-1].split('\n')):
#Not ideal split, potential to break
line_text = line.split(': ')
attr_name = line_text[0].strip(strip)
attr_value = line_text[1].strip(strip)
d[attr_name] = attr_value
out = {}
out = d
filename = output_append
listData = {}
with open(filename) as fp:
listData = json.load(fp)
listData.append(out)
with open(filename, 'w') as json_file:
json.dump(listData, json_file,
separators=(',',': '))
#clean out existing json
with open(output_append, mode='w', encoding='utf-8') as f:
json.dump([], f)
#iterate over directory - all files
for filename in os.listdir(directory):
try:
path = os.path.join(directory, filename)
near_entitity_md_to_json(directory, filename, path)
except:
pass