generated from mysociety/python-data-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update group query to be consistent between runs
- avoid repo clogging up with variations on same parquet
- Loading branch information
Showing
7 changed files
with
42 additions
and
15 deletions.
There are no files selected for viewing
Binary file modified
BIN
-30.5 KB
(100%)
data/data_packages/all_time_register/register_of_interests.parquet
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file modified
BIN
+9.53 KB
(100%)
data/data_packages/parliament_2019/register_of_interests.parquet
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,37 @@ | ||
select | ||
public_whip_id, | ||
category_name, | ||
free_text, | ||
last(member_name) as member_name, | ||
min(registry_date) as earliest_declaration, | ||
max(registry_date) as latest_declaration | ||
from {{ parquet_path }} | ||
group by public_whip_id, category_name, free_text | ||
order by public_whip_id, latest_declaration | ||
-- We want to ensure we have the same name for all entries of the same person | ||
-- But it's not like that in the source | ||
-- So we get the last name declared, and then join that back to the original data | ||
WITH LastMemberNames AS ( | ||
SELECT DISTINCT public_whip_id, member_name | ||
FROM ( | ||
SELECT DISTINCT | ||
public_whip_id, | ||
member_name, | ||
registry_date, | ||
ROW_NUMBER() OVER (PARTITION BY public_whip_id ORDER BY registry_date DESC) AS rn | ||
FROM {{ parquet_path }} | ||
) as subquery | ||
WHERE rn = 1 | ||
) | ||
|
||
SELECT | ||
t.public_whip_id as public_whip_id, | ||
t.category_name as category_name, | ||
t.free_text as free_text, | ||
MIN(lm.member_name) AS member_name, | ||
MIN(t.registry_date) AS earliest_declaration, | ||
MAX(t.registry_date) AS latest_declaration | ||
FROM {{ parquet_path }} t | ||
JOIN LastMemberNames lm ON t.public_whip_id = lm.public_whip_id | ||
GROUP BY | ||
t.public_whip_id, | ||
lm.member_name, | ||
t.category_name, | ||
t.free_text, | ||
ORDER BY | ||
t.public_whip_id, | ||
latest_declaration, | ||
t.category_name, | ||
t.free_text, | ||
lm.member_name, | ||
earliest_declaration; |