-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculate_scores.py
92 lines (87 loc) · 4.02 KB
/
calculate_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""Required functions to find the stays longer than the threshold"""
from datetime import datetime
from sqlalchemy.orm.session import Session
from model import ScoreLog
# The number of hours that count as a stay, stays less than this will be ignored
THRESHOLD_MINUTES = 60
# The report collection rate (how often are reports generated)
REPORT_MINUTES = 15
# The number of consecutive reports needed to be considered a stay
CONSECUTIVE_COUNT = (THRESHOLD_MINUTES / REPORT_MINUTES) - 1
# TODO: Add the functionailty to change consecutive count in the query based on this constant
def calculate(session: Session) -> None:
"""Find the stays"""
query = (
"SELECT results.NeighbourhoodId, "
# Round to 2 decimal points
"ROUND ( "
# The number of stays multipled by the neighbourhood ratio
"SUM(results.ReportCount) * neighbourhood.Ratio"
", 2) "
"AS Score "
"FROM ( "
# Select the neighbourhood,
"SELECT DISTINCT windowed_reports.NeighbourhoodId, "
# And the number of stay reports
"ROUND("
"POWER("
"COUNT(*) OVER (PARTITION BY UserId, NeighbourhoodId"
") - 3, 0.25"
") + 1, 2) AS ReportCount "
"FROM ("
# Run a window function to get the required columns,
# next timestamp, and previous timestamp of a report neighbouhoods
"SELECT "
"Id, UserId, NeighbourhoodId, Timestamp, "
"LEAD(NeighbourhoodId) OVER (PARTITION BY UserId ORDER BY Timestamp) "
"AS next_neighbourhood,"
"LEAD(NeighbourhoodId, 2) OVER (PARTITION BY UserId ORDER BY Timestamp) "
"AS second_next_neighbourhood, "
"LAG(NeighbourhoodId) OVER (PARTITION BY UserId ORDER BY Timestamp) "
"AS prev_neighbourhood, "
"LAG(NeighbourhoodId, 2) OVER (PARTITION BY UserId ORDER BY Timestamp) "
"AS second_prev_neighbourhood, "
"LEAD(Timestamp) OVER (PARTITION BY UserId ORDER BY Timestamp) "
"AS next_timestamp, "
"LAG(Timestamp) OVER (PARTITION BY UserId ORDER BY Timestamp) "
"AS prev_timestamp "
"FROM CovidAlerter.Reports"
") AS windowed_reports "
"WHERE "
# The report is the start of a stay
"("
"windowed_reports.NeighbourhoodId = windowed_reports.next_neighbourhood "
"AND "
"windowed_reports.NeighbourhoodId = windowed_reports.second_next_neighbourhood"
") "
"OR "
# The report is in the middle of a stay
"("
"windowed_reports.NeighbourhoodId = windowed_reports.next_neighbourhood "
"AND "
"windowed_reports.NeighbourhoodId = windowed_reports.prev_neighbourhood"
") "
"OR "
# The report is the end of a stay
"("
"windowed_reports.NeighbourhoodId = windowed_reports.prev_neighbourhood "
"AND "
"windowed_reports.NeighbourhoodId = windowed_reports.second_prev_neighbourhood"
") "
") AS results "
# Join the neighbourhood to get the ratio
"INNER JOIN CovidAlerter.Neighbourhoods AS neighbourhood "
"ON results.NeighbourhoodId = neighbourhood.Id "
"GROUP BY results.NeighbourhoodId"
";"
)
results = session.execute(query).fetchall()
# Result row format:
# (NeighbourhoodId, Score)
# Create a table row for each neighbourhood
# TODO: Remove this in favor of a solution inside the original SQL query
logs = [ScoreLog(NeighbourhoodId=result[0], Score=result[1],
Date=datetime.utcnow().date()) for result in results]
# Add the scores and commit to table
session.add_all(logs)
session.commit()