forked from Waultics/coinmarketcap-history
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoinmarketcap_usd_history.py
executable file
·180 lines (135 loc) · 5.86 KB
/
coinmarketcap_usd_history.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/python
"""
CoinMarketCap USD Price History
Print the CoinMarketCap USD price history for a particular cryptocurrency in CSV format.
"""
import sys
import re
import urllib2
import argparse
import datetime
parser = argparse.ArgumentParser()
parser.add_argument("currency", help="This is the name of the crypto, as is shown on coinmarketcap. For BTC, "
"for example, type: bitcoin.", type=str)
parser.add_argument("start_date", help="Start date from which you wish to retrieve the historical data. For example, "
"'2017-10-01'.", type=str)
parser.add_argument("end_date", help="End date for the historical data retrieval. If you wish to retrieve all the "
"data then you can give a date in the future. Same format as in start_date "
"'yyyy-mm-dd'.", type=str)
parser.add_argument("--dataframe", help="If present, returns a pandas DataFrame.",action='store_true')
def parse_options(args):
"""
Extract parameters from command line.
"""
currency = args.currency.lower()
start_date = args.start_date
end_date = args.end_date
start_date_split = start_date.split('-')
end_date_split = end_date.split('-')
start_year = int(start_date_split[0])
end_year = int(end_date_split[0])
# String validation
pattern = re.compile('[2][0][1][0-9]-[0-1][0-9]-[0-3][0-9]')
if not re.match(pattern, start_date):
raise ValueError('Invalid format for the start_date: ' + start_date + ". Should be of the form: yyyy-mm-dd.")
if not re.match(pattern, end_date):
raise ValueError('Invalid format for the end_date: ' + end_date + ". Should be of the form: yyyy-mm-dd.")
# Datetime validation for the correctness of the date. Will throw a ValueError if not valid
datetime.datetime(start_year,int(start_date_split[1]),int(start_date_split[2]))
datetime.datetime(end_year, int(end_date_split[1]), int(end_date_split[2]))
# CoinMarketCap's price data (at least for Bitcoin, presuambly for all others) only goes back to 2013
invalid_args = start_year < 2013
invalid_args = invalid_args or end_year < 2013
invalid_args = invalid_args or end_year < start_year
if invalid_args:
print('Usage: ' + __file__ + ' <currency> <start_date> <end_date> --dataframe')
sys.exit(1)
start_date = start_date_split[0]+ start_date_split[1] + start_date_split[2]
end_date = end_date_split[0] + end_date_split[1] + end_date_split[2]
return currency, start_date, end_date
def download_data(currency, start_date, end_date):
"""
Download HTML price history for the specified cryptocurrency and time range from CoinMarketCap.
"""
url = 'https://coinmarketcap.com/currencies/' + currency + '/historical-data/' + '?start=' \
+ start_date + '&end=' + end_date
try:
page = urllib2.urlopen(url,timeout=10)
if page.getcode() != 200:
raise Exception('Failed to load page')
html = page.read()
page.close()
except Exception as e:
print('Error fetching price data from ' + url)
print('Did you use a valid CoinMarketCap currency?\nIt should be entered exactly as displayed on CoinMarketCap.com (case-insensitive), with dashes in place of spaces.')
if hasattr(e, 'message'):
print("Error message: " + e.message)
else:
print(e)
sys.exit(1)
return html
def extract_data(html):
"""
Extract the price history from the HTML.
The CoinMarketCap historical data page has just one HTML table. This table contains the data we want.
It's got one header row with the column names.
We need to derive the "average" price for the provided data.
"""
head = re.search(r'<thead>(.*)</thead>', html, re.DOTALL).group(1)
header = re.findall(r'<th .*>([\w ]+)</th>', head)
header.append('Average (High + Low / 2)')
body = re.search(r'<tbody>(.*)</tbody>', html, re.DOTALL).group(1)
raw_rows = re.findall(r'<tr[^>]*>' + r'\s*<td[^>]*>([^<]+)</td>'*7 + r'\s*</tr>', body)
# strip commas
rows = []
for row in raw_rows:
row = [ field.translate(None, ',') for field in row ]
rows.append(row)
# calculate averages
def append_average(row):
high = float(row[header.index('High')])
low = float(row[header.index('Low')])
average = (high + low) / 2
row.append( '{:.2f}'.format(average) )
return row
rows = [ append_average(row) for row in rows ]
return header, rows
def render_csv_data(header, rows):
"""
Render the data in CSV format.
"""
print(','.join(header))
for row in rows:
print(','.join(row))
# --------------------------------------------- Util Methods -----------------------------------------------------------
def processDataFrame(df):
import pandas as pd
assert isinstance(df, pd.DataFrame), "df is not a pandas DataFrame."
cols = list(df.columns.values)
cols.remove('Date')
df.loc[:,'Date'] = pd.to_datetime(df.Date)
for col in cols: df.loc[:,col] = df[col].apply(lambda x: float(x))
return df.sort_values(by='Date').reset_index(drop=True)
def rowsFromFile(filename):
import csv
with open(filename, 'rb') as infile:
rows = csv.reader(infile, delimiter=',')
for row in rows:
print(row)
# ----------------------------------------------------------------------------------------------------------------------
def main(args=None):
# assert that args is a list
if(args is not None):
args = parser.parse_args(args)
else:
args = parser.parse_args()
currency, start_date, end_date = parse_options(args)
html = download_data(currency, start_date, end_date)
header, rows = extract_data(html)
if(args.dataframe):
import pandas as pd
return processDataFrame(pd.DataFrame(data=rows,columns=header))
else:
render_csv_data(header, rows)
if __name__ == '__main__':
df = main()