diff --git a/dispatcher/backend/maint-scripts/report_youtube_api_keys.conf.json b/dispatcher/backend/maint-scripts/report_youtube_api_keys.conf.json new file mode 100644 index 00000000..89ee3594 --- /dev/null +++ b/dispatcher/backend/maint-scripts/report_youtube_api_keys.conf.json @@ -0,0 +1,15 @@ +{ + "cc319f3e8ee3f586646b98c55bc0a7b3d9f27ac3393d1bc982aaef5930cc313a": "large-crashcourse-1", + "9dc879afd8f9a95e7eb30167311dd7346bcc6bdfafc6bd1e217460830323c7ef": "large-mathtiques-2", + "d48ebff8fa1891f9531ab21c3bbfd0d6f1c91b301addb2bda1cea48d52531ef6": "large-sorcier-1", + "79e8cfc372f6fb20769a8339eb606bb17ed6a480279ff02a50f41f6cf944efd4": "large-teded-1", + "480e51b5ee93ee2b209906c8bf8362ddac8bd6d543f87674144ce5e66d167ebb": "large-univers-1", + "659fec208e08d2c8edd96a4ae7a16e71bd824c2ce569a80918fa8f4ab8e06ad7": "medium-youtubes-2", + "db29b7d06057fb992db99430ff19522e4781d88e2a2a2fbb2d3565296d45f722": "madrasa-playlists-2", + "3e2413945d668d47ab151ee1df9cb51e65360ed57c1b89eb8ee435cd47f37baf": "medium-youtubes-1", + "1060528e283299cc54de2f67ea9ab918b1e1ddb461b12b25eff1aba135ea458e": "small-youtubes-1", + "99fe8cfb95cfcf9e8851f01e567d9dd2b246a708aa7fc6b1752feb7320725c0f": "small-youtubes-2", + "6a0cfba941cfc1a4e85952bda5aff424cf95217d3b772777c60ec2d184112025": "madrasa-playlists-1", + "1c94fe405309067bd53b125f5f0c55e1640414a89e5f0075028bb313dde374eb": "large-mathtiques-1", + "0da9183bb0e127bb746ce77b27e1901b76c88370deeee52ec0898cb14be77c06": "unknown-1" +} \ No newline at end of file diff --git a/dispatcher/backend/maint-scripts/report_youtube_api_keys.py b/dispatcher/backend/maint-scripts/report_youtube_api_keys.py new file mode 100755 index 00000000..c61cc6dc --- /dev/null +++ b/dispatcher/backend/maint-scripts/report_youtube_api_keys.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +"""Create a report of API keys usage in youtube recipes + + ./report_youtube_api_keys.py + + Configuration file named "report_youtube_api_keys.conf.json" must be placed in + same folder as script and contain a dictionnary of sha256(api_key) => + api_key_display_name +""" + +import datetime +import hashlib +import json +import os +import pathlib + +import requests +import sqlalchemy as sa +import sqlalchemy.orm as so +from jinja2 import Environment, FileSystemLoader + +import db.models as dbm +from db import dbsession + + +@dbsession +def report_youtube_api_keys(session: so.Session, *, display_unknown_secrets=False): + jinja_env = Environment(loader=FileSystemLoader("./"), autoescape=True) + jinja_template = jinja_env.get_template("report_youtube_api_keys.txt") + create_issue = os.environ.get("CREATE_ISSUE", "false").lower() == "true" + if create_issue: + github_repo = os.environ["GITHUB_REPO"] + github_token = os.environ["GITHUB_TOKEN"] + github_issue_assignees = os.environ.get("GITHUB_ISSUE_ASSIGNEES", "").split(",") + github_issue_labels = os.environ.get("GITHUB_ISSUE_LABELS", "").split(",") + + known_api_keys = json.loads( + pathlib.Path("report_youtube_api_keys.conf.json").read_text() + ) + print("Listing schedules") + stmt = ( + sa.select(dbm.Schedule) + .where(dbm.Schedule.config["task_name"].astext == "youtube") + .where(dbm.Schedule.config["flags"]["api-key"].astext.is_not(None)) + .order_by(dbm.Schedule.config["flags"]["api-key"].astext) + ) + + schedules = list(session.execute(stmt).scalars()) + + schedules_by_api_key = {} + for schedule in schedules: + api_key = schedule.config["flags"]["api-key"] + hashed_api_key = hashlib.sha256(api_key.encode("utf-8")).hexdigest() + if hashed_api_key not in schedules_by_api_key.keys(): + schedules_by_api_key[hashed_api_key] = { + "api_key": api_key, + "key_name": known_api_keys[hashed_api_key] + if hashed_api_key in known_api_keys + else "unknown", + "schedules": [], + } + schedules_by_api_key[hashed_api_key]["schedules"].append(schedule.name) + + report_data = {} + report_data["nb_schedules"] = len(schedules) + report_data["keys"] = [] + + for hashed_api_key, data in schedules_by_api_key.items(): + report_data["keys"].append( + { + "name": known_api_keys[hashed_api_key] + if hashed_api_key in known_api_keys.keys() + else "unknown", + "schedules": sorted(data["schedules"]), + } + ) + if display_unknown_secrets and hashed_api_key not in known_api_keys.keys(): + print("Unknown key:") + print(f"API key: {data['api_key']}") + + for hashed_key, key_name in known_api_keys.items(): + if hashed_key not in schedules_by_api_key.keys(): + report_data["keys"].append( + { + "name": key_name, + "schedules": [], + } + ) + + report_data["keys"] = sorted(report_data["keys"], key=lambda apikey: apikey["name"]) + + report_data["datetime"] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + report = jinja_template.render(report_data=report_data) + + if create_issue: + print("Creating Github Issue") + resp = requests.post( + url=f"https://api.github.com/repos/{github_repo}/issues", + headers={ + "Authorization": f"Bearer {github_token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + }, + json={ + "title": ( + "Youtube API Key usage report" + f" {datetime.datetime.now().strftime('%b %Y')}" + ), + "body": jinja_template.render(report_data=report_data), + "assignees": github_issue_assignees, + "labels": github_issue_labels, + }, + ) + print(resp.json()) + resp.raise_for_status() + print(f"Github issue created successfully in {github_repo}") + + else: + print(report) + + return + + +if __name__ == "__main__": + report_youtube_api_keys(display_unknown_secrets=False) + print("DONE.") diff --git a/dispatcher/backend/maint-scripts/report_youtube_api_keys.txt b/dispatcher/backend/maint-scripts/report_youtube_api_keys.txt new file mode 100644 index 00000000..43f4fed9 --- /dev/null +++ b/dispatcher/backend/maint-scripts/report_youtube_api_keys.txt @@ -0,0 +1,13 @@ +This is the monthly report of API Keys usage across recipes in farm.openzim.org + +Please review this report, take appropriate action if needed and close the issue once done. + +**{{ report_data.nb_schedules }}** recipes using an API key have been found. + +{% for key in report_data["keys"] %} +{% if key.name != "unknown" %}## {{ key.name }}{% else %}## Unknown Key{% endif %} +{% for schedule in key["schedules"] %} +- {{ schedule }}{% endfor %} +{% if key["schedules"] | length == 0 %}*Key is not used*{% endif %} +{% endfor %} +*This report has been automatically generated at {{report_data.datetime}} UTC* \ No newline at end of file