-
Notifications
You must be signed in to change notification settings - Fork 2
/
ballsaal.py
113 lines (86 loc) · 3.5 KB
/
ballsaal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import concurrent.futures
import re
from datetime import datetime
import dateparser
import requests
from bs4 import BeautifulSoup
from dateutil.relativedelta import relativedelta
from event import DanceEvent
def clean_name(name: str) -> str:
# Some names start and end with a double quote for no reason
if name.startswith('"') and name.endswith('"'):
name = name[1:-1]
# Sometimes words inside names are in ALL-CAPS which is just ugly
def deupperice(text: str) -> str:
return text.capitalize() if text.isupper() else text
name = " ".join(map(deupperice, name.split(" ")))
# Some events end in ...
if name.endswith("..."):
name = name[:-3]
# And some events should just always be renamed
rename_table = {
"Vienna Salsa Splash": "Salsa Splash",
}
if name in rename_table:
name = rename_table[name]
return name
# For the ends_at and price we need to do a second request to the ticketing website
# because only there it says when the event will end. That is a bit of
# work so we are doing it here in a separate function.
def add_fine_detail(event: DanceEvent) -> DanceEvent:
response = requests.get(event.website, timeout=10)
response.raise_for_status()
html = response.text
soup = BeautifulSoup(html, "html.parser")
date_text = soup.find("span", class_="end-date").text
event.ends_at = dateparser.parse(date_text, languages=["de", "en"])
# We don't parse the year so, the year it might assume, can be off by one.
while event.starts_at > event.ends_at:
event.ends_at += relativedelta(days=1)
isAvailable = None
price_items = soup.findAll(class_="ticket-price-cell")
for price_item in price_items:
m = re.search(r"€ (\d+),(\d{2})", price_item.text)
if m is None:
continue
price = int(m.groups(0)[0]) * 100 + int(m.groups(0)[1])
if event.price_euro_cent is None or event.price_euro_cent > price:
event.price_euro_cent = price
if isAvailable is None or isAvailable == False:
isAvailable = not ("Ausgebucht" in price_item.text)
if isAvailable is not None and not isAvailable:
event.name += " [ausgebucht]"
return event
# For ballsaal.at we need to download and parse html. This is more tedious than
# a JSON API but at least the format is very consistent.
def download_ballsaal() -> list[DanceEvent]:
response = requests.get(
"https://www.ballsaal.at/termine_tickets/?no_cache=1", timeout=10
)
response.raise_for_status()
soup = BeautifulSoup(response.text, features="html.parser")
event_items = soup.find_all(class_="event")
events = []
for event in event_items:
name = event.find(class_="name").text
name = clean_name(name)
description = event.find(class_="short-description").text
date_string = event.find(class_="date").text
url = event.find(class_="button")["href"]
date = datetime.strptime(date_string[4:], "%d.%m.%Y, %H:%M Uhr")
events.append(
DanceEvent(
starts_at=date,
name=name,
price_euro_cent=None,
description=description,
dancing_school="Ballsaal",
website=url,
)
)
# Add the ends_at to each event event if
with concurrent.futures.ThreadPoolExecutor(
max_workers=max(1, len(events))
) as executor:
events = list(executor.map(add_fine_detail, events))
return events