-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
83 lines (63 loc) · 2.1 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/python
""" main
Crawler of Devost project sites from EUvsVirusHackathon for D4G
EUvsVirusCluster project.
Script crawls endpoints provided by devpost-api project by ViRb3
(https://github.com/ViRb3/devpost-api):
(1) docker pull virb3/devpost-api:latest
(2) docker run -p 5000:5000 virb3/devpost-api:latest
(3) Access API at http://127.0.0.1:5000
- endpoints:
/user/:username [NA for this project]
/project/:project_name
Data are persisted as JSON dump in data directory.
Author: datadonk23
Date: 07.05.20
"""
import json
import logging
logging.getLogger().setLevel(logging.INFO)
from request_util import make_request
def filter_response(resp):
""" Filters requests according to desired schema.
:param resp: requests.Response
:return: Filtered data
:return type: dict
"""
data = {}
json_resp = resp.json()
if json_resp["title"]:
data["title"] = json_resp["title"]
else:
data["title"] = None
if json_resp["text"]:
data["text"] = json_resp["text"]
else:
data["text"] = None
return data
def perist_fetched_data(fetched_data, f_path):
""" Persist fetched data as JSON file.
:param f_path: Filepath of output file
:type f_path: str
:param fetched_data: Fetched data
:type fetched_data: [Dict]
:return: -
"""
with open(f_path, "w") as f:
json.dump(fetched_data, f)
if __name__ == "__main__":
host = "http://127.0.0.1:5000"
project_endpoint = "/project/"
fetched_data = []
output_fpath = "data/EUvsVirus_projects.json"
#FIXME fetched proj names list goes here
test_proj_list = ["zero_project",
"eunia-european-union-national-informal-assistance-oz4kcp",
"crowd-free-x08uy5", "covid-gur92q", "jobliebe",
"myminoritymatters"]
for proj in test_proj_list:
logging.info("Fetch data from " + str(proj))
req_url = host + project_endpoint + proj
resp = make_request(req_url)
fetched_data.append(filter_response(resp))
perist_fetched_data(fetched_data,output_fpath)