-
Notifications
You must be signed in to change notification settings - Fork 1
/
GetGames.py
61 lines (54 loc) · 2.17 KB
/
GetGames.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import copper
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime, date
year = 2013
teams = pd.read_csv('teams.csv')
BASE_URL = 'http://espn.go.com/nba/team/schedule/_/name/{0}/year/{1}/{2}'
match_id = []
dates = []
home_team = []
home_team_score = []
visit_team = []
visit_team_score = []
for index, row in teams.iterrows():
_team, url = row['team'], row['url']
r = requests.get(BASE_URL.format(row['prefix_1'], year, row['prefix_2']))
table = BeautifulSoup(r.text).table
for row in table.find_all('tr')[1:]: # Remove header
columns = row.find_all('td')
try:
_home = True if columns[1].li.text == 'vs' else False
_other_team = columns[1].find_all('a')[1].text
_score = columns[2].a.text.split(' ')[0].split('-')
_won = True if columns[2].span.text == 'W' else False
match_id.append(columns[2].a['href'].split('?id=')[1])
home_team.append(_team if _home else _other_team)
visit_team.append(_team if not _home else _other_team)
d = datetime.strptime(columns[0].text, '%a, %b %d')
dates.append(date(year, d.month, d.day))
if _home:
if _won:
home_team_score.append(_score[0])
visit_team_score.append(_score[1])
else:
home_team_score.append(_score[1])
visit_team_score.append(_score[0])
else:
if _won:
home_team_score.append(_score[1])
visit_team_score.append(_score[0])
else:
home_team_score.append(_score[0])
visit_team_score.append(_score[1])
except Exception as e:
pass # Not all columns row are a match, is OK
# print(e)
dic = {'id': match_id, 'date': dates, 'home_team': home_team, 'visit_team': visit_team,
'home_team_score': home_team_score, 'visit_team_score': visit_team_score}
games = pd.DataFrame(dic).drop_duplicates(cols='id').set_index('id')
print(games)
with open("games.csv", 'wb') as o:
games.to_csv(o)