From aba8d0a94574985c88cd1662cf39a9c8dc629e13 Mon Sep 17 00:00:00 2001 From: Sym Roe Date: Sat, 2 Sep 2023 21:09:53 +0100 Subject: [PATCH] Don't write data for ended councils --- .github/workflows/build.yml | 2 +- write-data.py | 46 +++++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d837d770a1..803667db3d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,7 +35,7 @@ jobs: - name: install run: | - pip install boto3 + pip install boto3 requests - name: "Sync logbooks" run: | diff --git a/write-data.py b/write-data.py index 7cd4feebe7..370b35b5b0 100644 --- a/write-data.py +++ b/write-data.py @@ -1,14 +1,20 @@ import json from dataclasses import dataclass, field, asdict -from datetime import datetime, timedelta +from datetime import datetime, timedelta, date from pathlib import Path from typing import List +from urllib.parse import urljoin import boto3 +import requests + +EE_BASE_URL = "https://elections.democracyclub.org.uk/" client = boto3.client("codecommit", region_name="eu-west-2") -repos = [result["repositoryName"] for result in client.list_repositories()["repositories"]] +repos = [ + result["repositoryName"] for result in client.list_repositories()["repositories"] +] # LAST_N_RUNS = [] # for i in range(60, 0, -7): @@ -61,7 +67,7 @@ def run_date(self): def from_code_commit(cls: "LogRun", json_data): data = json.loads(json_data) status_code = None - if "status_code" in data: + if "status_code" in data: status_code = data["status_code"] return cls( status_code=status_code, @@ -76,14 +82,44 @@ def from_code_commit(cls: "LogRun", json_data): logs: List[LogBook] = [] +def make_ee_request(council_id: str) -> dict: + url = urljoin(EE_BASE_URL, f"/api/organisations/local-authority/{council_id}/") + results = requests.get(url).json() + return results["results"][0] + + +def council_past_end_date(council_id: str) -> bool: + """ + Check if the end date for the council is in the past, and if so, don't + add it to the report. We don't run scrapers for ended councils, so these + councils would just fill up the report with, probably, failing scrapers. + """ + + metadata = make_ee_request(council_id) + end_date = metadata.get("end_date") + if not end_date: + return False + end_date_obj = datetime.strptime(end_date, '%Y-%m-%d').date() + if end_date_obj < date.today(): + return True + return False + for repo in repos: + if repo in ["CouncillorsRepo", "test"]: + continue + print(repo) + if council_past_end_date(repo): + print(f"Skipping {repo} as it's end_date is in the past") + continue try: - print(repo) log_file = client.get_file( filePath="Councillors/logbook.json", repositoryName=repo, ) - except (client.exceptions.FileDoesNotExistException, client.exceptions.CommitDoesNotExistException): + except ( + client.exceptions.FileDoesNotExistException, + client.exceptions.CommitDoesNotExistException, + ): log_file = {} log_data = LogBook.from_codecommit(repo, log_file)