-
Notifications
You must be signed in to change notification settings - Fork 1
/
countries.py
64 lines (49 loc) · 1.84 KB
/
countries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from urllib import request
from os import getcwd, path
import datetime
import pandas as pd
from helpers import ensure_dirs
# COUNTRIES_DATA = 'https://opendata.ecdc.europa.eu/covid19/casedistribution/csv'
COUNTRIES_DATA = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
def scrape_countries():
cwd = getcwd()
countries_dir = path.join(cwd, 'data', 'countries')
ensure_dirs(countries_dir)
countries = {}
df = pd.read_csv(COUNTRIES_DATA, parse_dates=[0], dayfirst=True)
for country in df['location'].unique():
is_country = df['location'] == country
country_filename = country.lower().replace(' ', '_') + '.csv'
country_file = path.join(countries_dir, country_filename)
countries[country] = country_filename
country_df = df[is_country]
country_df.rename(columns={
"date": "dateRep",
"new_cases": "cases",
"new_deaths": "deaths",
"location": "countriesAndTerritories",
"iso_code": "countryterritoryCode",
"population": "popData2019",
"continent": "continentExp",
}, inplace=True)
country_df.to_csv(country_file, index=False, float_format='%.f')
with open(path.join(countries_dir, 'README.md'), 'w') as readme_f:
readme_f.write(get_readme_contents(countries))
def get_readme_contents(countries):
countries_datasets = [
(country.replace('_', ' '), filename)
for country, filename
in countries.items()
]
toc = [
f'| {name} | [`{dataset}`]({dataset}) |'
for name, dataset
in countries_datasets
]
toc_contents = '\n'.join(toc)
return f"""## Countries
> Last updated at {datetime.datetime.now(datetime.timezone.utc).strftime('%b %d %Y %H:%M:%S UTC')}.
| Country | Dataset |
| ------ | ------- |
{toc_contents}
"""