From 15dc0bca09f623cb275a80e74f1df7837d2b98fb Mon Sep 17 00:00:00 2001 From: James Champ Date: Wed, 10 Jan 2024 10:17:08 -0800 Subject: [PATCH 01/23] Add argument parser and default options --- scripts/gh_scripts/config/bot.ini | 7 +++ scripts/gh_scripts/issue_comment_bot.py | 82 +++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 scripts/gh_scripts/config/bot.ini create mode 100755 scripts/gh_scripts/issue_comment_bot.py diff --git a/scripts/gh_scripts/config/bot.ini b/scripts/gh_scripts/config/bot.ini new file mode 100644 index 00000000000..d8a6609d1f2 --- /dev/null +++ b/scripts/gh_scripts/config/bot.ini @@ -0,0 +1,7 @@ +[tokens] +slack_token= +github_token= + +[issue_digest] +default_hours=48 +slack_channel=#team-abc-plus diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py new file mode 100755 index 00000000000..f708abcb6ee --- /dev/null +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +""" +Fetches Open Library GitHub issues that have been commented on +within some amount of time, in hours. + +Writes links to each issue to Slack channel #team-abc-plus +""" +import argparse +import errno +import sys + +from configparser import ConfigParser +from pathlib import Path + + +config_file = 'config/bot.ini' + +def start_job(args: dict): + """ + Starts the new comment digest job. + """ + pass + + +def _get_parser() -> argparse.ArgumentParser: + """ + Creates and returns an ArgumentParser containing default values which were + read from the config file. + """ + def get_defaults(): + """ + Reads the config file and returns a dict containing the default + values for this script's arguments. + + Location of config file is expected to be a nested within this file's parent + directory. The relative file location is stored as `config_file`. + """ + this_dir = Path(__file__).parent.resolve() + config_path = this_dir / Path(config_file) + if not config_path.exists(): + # XXX : Log to file: + print(f'{config_file} does not exist.') + sys.exit(errno.ENOENT) + + config = ConfigParser() + config.read(config_path) + return { + 'slack_token': config['tokens'].get('slack_token', ''), + 'hours': config['issue_digest'].getint('default_hours', 1), + 'slack_channel': config['issue_digest'].get('slack_channel', '#team-abc-plus'), + } + + defaults = get_defaults() + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--hours', + metavar='N', + help='Fetch issues that have been updated since N hours ago', + type=int, + default=defaults['hours'], + ) + parser.add_argument( + '-c', + '--channel', + help="Issues will be published to this Slack channel", + type=str, + default=defaults['slack_channel'], + ) + parser.add_argument( + '-t', + '--token', + help='Slack auth token', + type=str, + default=defaults['slack_token'], + ) + + return parser + +if __name__ == '__main__': + parser = _get_parser() + args = parser.parse_args() + start_job(args) From 89bc2f1fdbda1433670cce9208d35d3b33369351 Mon Sep 17 00:00:00 2001 From: James Champ Date: Wed, 10 Jan 2024 10:23:31 -0800 Subject: [PATCH 02/23] Fetch and filter GitHub issues --- scripts/gh_scripts/issue_comment_bot.py | 74 ++++++++++++++++++++++++- scripts/gh_scripts/requirements.txt | 1 + 2 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 scripts/gh_scripts/requirements.txt diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index f708abcb6ee..a5075c98e4a 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -10,16 +10,88 @@ import sys from configparser import ConfigParser +from datetime import datetime, timedelta from pathlib import Path +import requests config_file = 'config/bot.ini' +# XXX : Configure? +staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini'] + +def fetch_issues(updated_since: str): + query = f'repo:internetarchive/openlibrary is:open is:issue comments:>1 updated:>{updated_since}' + response = requests.get( + 'https://api.github.com/search/issues?per_page=100', + params={ + 'q': query, + }, + ) + d = response.json() + results = d['items'] + + # Pagination + def get_next_page(url: str): + """Returns list of issues and optional url for next page""" + resp = requests.get(url) + # Get issues + d = resp.json() + issues = d['items'] + # Prepare url for next page + next = resp.links.get('next', {}) + next_url = next.get('url', '') + + return issues, next_url + + links = response.links + next = links.get('next', {}) + next_url = next.get('url', '') + while next_url: + # Make call with next link + issues, next_url = get_next_page(next_url) + results = results + issues + + return results + +def filter_issues(issues: list, since_date: str): + """ + Returns list of issues that were not last responded to by staff. + Requires fetching the most recent comments for the given issues. + """ + filtered_issues = [] + + for i in issues: + comments_url = i.get('comments_url') + resp = requests.get(f'{comments_url}?since{since_date}Z') + comments = resp.json() + last_comment = comments[-1] + last_commenter = last_comment['user']['login'] + + if last_commenter not in staff_usernames: + filtered_issues.append(i) + + return filtered_issues + +def publish_digest(issues: list[str], slack_channel: str, slack_token: str): + pass + +def create_date_string(hours): + now = datetime.now() + # XXX : Add a minute or two to the delta? + since = now - timedelta(hours=hours) + return since.strftime(f'%Y-%m-%dT%H:%M:%S') + def start_job(args: dict): """ Starts the new comment digest job. """ - pass + print(args) + date_string = create_date_string(args.hours) + issues = fetch_issues(date_string) + filtered_issues = filter_issues(issues, date_string) + if filtered_issues: + publish_digest(filtered_issues, args.channel, args.token) def _get_parser() -> argparse.ArgumentParser: diff --git a/scripts/gh_scripts/requirements.txt b/scripts/gh_scripts/requirements.txt new file mode 100644 index 00000000000..2c24336eb31 --- /dev/null +++ b/scripts/gh_scripts/requirements.txt @@ -0,0 +1 @@ +requests==2.31.0 From dc81b181d5717d5122feda5e30704074b65fdc9d Mon Sep 17 00:00:00 2001 From: James Champ Date: Wed, 10 Jan 2024 11:37:42 -0800 Subject: [PATCH 03/23] Publish results to Slack --- scripts/gh_scripts/issue_comment_bot.py | 55 +++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index a5075c98e4a..7c40ae3bb3e 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -8,6 +8,7 @@ import argparse import errno import sys +import time from configparser import ConfigParser from datetime import datetime, timedelta @@ -74,7 +75,55 @@ def filter_issues(issues: list, since_date: str): return filtered_issues def publish_digest(issues: list[str], slack_channel: str, slack_token: str): - pass + parent_thread_msg = f'There are {len(issues)} issue(s) awaiting response. More details in this thread.' + response = requests.post( + 'https://slack.com/api/chat.postMessage', + headers={ + 'Authorization': f"Bearer {slack_token}", + 'Content-Type': 'application/json; charset=utf-8', + }, + json={ + 'channel': slack_channel, + 'text': parent_thread_msg, + }, + ) + + if response.status_code != 200: + # XXX : Log this + print(f'Failed to send message to Slack. Status code: {response.status_code}') + # XXX : Add retry logic + sys.exit(errno.ECOMM) + + d = response.json() + # Store timestamp, which, along with the channel, uniquely identifies the parent thread + ts = d.get('ts') + + def comment_on_thread(message: str): + response = requests.post( + 'https://slack.com/api/chat.postMessage', + headers={ + 'Authorization': f"Bearer {slack_token}", + 'Content-Type': 'application/json; charset=utf-8', + }, + json={ + 'channel': slack_channel, + 'text': message, + 'thread_ts': ts + }, + ) + if response.status_code != 200: + # XXX : Log this + print(f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}') + # XXX : Retry logic? + + for issue in issues: + # Slack rate limit is roughly 1 request per second + time.sleep(1) + + issue_url = issue.get('html_url') + issue_title = issue.get('title') + message = f'<{issue_url}|*{issue_title}*>' + comment_on_thread(message) def create_date_string(hours): now = datetime.now() @@ -86,13 +135,13 @@ def start_job(args: dict): """ Starts the new comment digest job. """ - print(args) date_string = create_date_string(args.hours) issues = fetch_issues(date_string) filtered_issues = filter_issues(issues, date_string) if filtered_issues: publish_digest(filtered_issues, args.channel, args.token) - + # XXX : Log this + print('Digest POSTed to Slack') def _get_parser() -> argparse.ArgumentParser: """ From 6d5d9b4b0b29cba1571b038939f1d8d81e833925 Mon Sep 17 00:00:00 2001 From: James Champ Date: Wed, 10 Jan 2024 17:58:35 -0800 Subject: [PATCH 04/23] Add more information to digest --- scripts/gh_scripts/issue_comment_bot.py | 110 ++++++++++++++++++------ 1 file changed, 84 insertions(+), 26 deletions(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index 7c40ae3bb3e..b21e7e65f26 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -22,17 +22,29 @@ staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini'] def fetch_issues(updated_since: str): - query = f'repo:internetarchive/openlibrary is:open is:issue comments:>1 updated:>{updated_since}' + """ + Fetches all GitHub issues that have been updated since the given date string and have at least one comment. + + GitHub results are paginated. This functions appends each result to a list, and does so for all pages. + To keep API calls to a minimum, we request the maximum number of results per request (100 per page, as of writing). + + Important: Updated issues need not have a recent comment. Update events include many other things, such as adding a + label to an issue, or moving an issue to a milestone. Issues returned by this function will require additional + processing in order to determine if they have recent comments. + """ + # Make initial query for updated issues: + query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}' response = requests.get( - 'https://api.github.com/search/issues?per_page=100', + 'https://api.github.com/search/issues', params={ 'q': query, + 'per_page': 100, }, ) d = response.json() results = d['items'] - # Pagination + # Fetch additional updated issues, if any exist def get_next_page(url: str): """Returns list of issues and optional url for next page""" resp = requests.get(url) @@ -55,27 +67,63 @@ def get_next_page(url: str): return results -def filter_issues(issues: list, since_date: str): +def filter_issues(issues: list, since: datetime): """ Returns list of issues that were not last responded to by staff. Requires fetching the most recent comments for the given issues. """ - filtered_issues = [] + results = [] for i in issues: + # Fetch comments using URL from previous GitHub search results comments_url = i.get('comments_url') - resp = requests.get(f'{comments_url}?since{since_date}Z') + resp = requests.get( + comments_url, + params={ + 'per_page': 100 + } + ) + + # Ensure that we have the last page of comments + links = resp.links + last = links.get('last', {}) + last_url = last.get('url', '') + + if last_url: + resp = requests.get(last_url) + + # Get last comment comments = resp.json() last_comment = comments[-1] - last_commenter = last_comment['user']['login'] - if last_commenter not in staff_usernames: - filtered_issues.append(i) + # Determine if last comment meets our criteria for Slack notifications + # First step: Ensure that the last comment was left after the given `since` datetime + created = datetime.fromisoformat(last_comment['created_at']) + # Removing timezone info to avoid TypeErrors, which occur when + # comparing a timezone-aware datetime with a timezone-naive datetime + created = created.replace(tzinfo=None) + if created > since: + # Next step: Determine if the last commenter is a staff member + last_commenter = last_comment['user']['login'] + if last_commenter not in staff_usernames: + results.append({ + 'comment_url': last_comment['html_url'], + 'commenter': last_commenter, + 'issue_title': i['title'], + }) - return filtered_issues + return results + +def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hours_passed: int): + """ + Creates a threaded Slack messaged containing a digest of recently commented GitHub issues. + + Parent Slack message will say how many comments were left, and the timeframe. Each reply + will include a link to the comment, as well as addiitonal information. + """ + # Create the parent message + parent_thread_msg = f'At least {len(issues)} new comment(s) have been left by contributors in the past {hours_passed} hour(s)' -def publish_digest(issues: list[str], slack_channel: str, slack_token: str): - parent_thread_msg = f'There are {len(issues)} issue(s) awaiting response. More details in this thread.' response = requests.post( 'https://slack.com/api/chat.postMessage', headers={ @@ -91,7 +139,7 @@ def publish_digest(issues: list[str], slack_channel: str, slack_token: str): if response.status_code != 200: # XXX : Log this print(f'Failed to send message to Slack. Status code: {response.status_code}') - # XXX : Add retry logic + # XXX : Add retry logic? sys.exit(errno.ECOMM) d = response.json() @@ -99,6 +147,9 @@ def publish_digest(issues: list[str], slack_channel: str, slack_token: str): ts = d.get('ts') def comment_on_thread(message: str): + """ + Posts the given message as a reply to the parent message. + """ response = requests.post( 'https://slack.com/api/chat.postMessage', headers={ @@ -108,7 +159,7 @@ def comment_on_thread(message: str): json={ 'channel': slack_channel, 'text': message, - 'thread_ts': ts + 'thread_ts': ts, }, ) if response.status_code != 200: @@ -116,30 +167,34 @@ def comment_on_thread(message: str): print(f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}') # XXX : Retry logic? - for issue in issues: + for i in issues: # Slack rate limit is roughly 1 request per second time.sleep(1) - issue_url = issue.get('html_url') - issue_title = issue.get('title') - message = f'<{issue_url}|*{issue_title}*>' + comment_url = i['comment_url'] + issue_title = i['issue_title'] + commenter = i['commenter'] + message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n' + message += f'Commenter: *{commenter}*' comment_on_thread(message) -def create_date_string(hours): +def time_since(hours): + """Returns datetime and string representations of the current time, minus the given hour""" now = datetime.now() - # XXX : Add a minute or two to the delta? + # XXX : Add a minute or two to the delta (to avoid dropping issues)? since = now - timedelta(hours=hours) - return since.strftime(f'%Y-%m-%dT%H:%M:%S') + return since, since.strftime(f'%Y-%m-%dT%H:%M:%S') def start_job(args: dict): """ Starts the new comment digest job. """ - date_string = create_date_string(args.hours) + since, date_string = time_since(args.hours) issues = fetch_issues(date_string) - filtered_issues = filter_issues(issues, date_string) + filtered_issues = filter_issues(issues, since) + if filtered_issues: - publish_digest(filtered_issues, args.channel, args.token) + publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) # XXX : Log this print('Digest POSTed to Slack') @@ -165,6 +220,8 @@ def get_defaults(): config = ConfigParser() config.read(config_path) + # XXX : Setting the defaults both here in the `get` calls and the config file + # is like wearing a belt with suspenders... return { 'slack_token': config['tokens'].get('slack_token', ''), 'hours': config['issue_digest'].getint('default_hours', 1), @@ -188,8 +245,8 @@ def get_defaults(): default=defaults['slack_channel'], ) parser.add_argument( - '-t', - '--token', + '-s', + '--slack-token', help='Slack auth token', type=str, default=defaults['slack_token'], @@ -198,6 +255,7 @@ def get_defaults(): return parser if __name__ == '__main__': + # Process command-line arguments and starts the notification job parser = _get_parser() args = parser.parse_args() start_job(args) From 4fae3a36084c2ff3c32a3246ec24e089924e2d56 Mon Sep 17 00:00:00 2001 From: James Champ Date: Thu, 11 Jan 2024 11:39:30 -0800 Subject: [PATCH 05/23] Remove default configuration --- scripts/gh_scripts/config/bot.ini | 7 ---- scripts/gh_scripts/issue_comment_bot.py | 50 ++++--------------------- 2 files changed, 7 insertions(+), 50 deletions(-) delete mode 100644 scripts/gh_scripts/config/bot.ini diff --git a/scripts/gh_scripts/config/bot.ini b/scripts/gh_scripts/config/bot.ini deleted file mode 100644 index d8a6609d1f2..00000000000 --- a/scripts/gh_scripts/config/bot.ini +++ /dev/null @@ -1,7 +0,0 @@ -[tokens] -slack_token= -github_token= - -[issue_digest] -default_hours=48 -slack_channel=#team-abc-plus diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index b21e7e65f26..003d5c022b7 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -3,22 +3,17 @@ Fetches Open Library GitHub issues that have been commented on within some amount of time, in hours. -Writes links to each issue to Slack channel #team-abc-plus +Writes links to each issue to given Slack channel. """ import argparse import errno import sys import time -from configparser import ConfigParser from datetime import datetime, timedelta -from pathlib import Path import requests -config_file = 'config/bot.ini' - -# XXX : Configure? staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini'] def fetch_issues(updated_since: str): @@ -196,60 +191,29 @@ def start_job(args: dict): if filtered_issues: publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) # XXX : Log this - print('Digest POSTed to Slack') + print('Digest posted to Slack') def _get_parser() -> argparse.ArgumentParser: """ Creates and returns an ArgumentParser containing default values which were read from the config file. """ - def get_defaults(): - """ - Reads the config file and returns a dict containing the default - values for this script's arguments. - - Location of config file is expected to be a nested within this file's parent - directory. The relative file location is stored as `config_file`. - """ - this_dir = Path(__file__).parent.resolve() - config_path = this_dir / Path(config_file) - if not config_path.exists(): - # XXX : Log to file: - print(f'{config_file} does not exist.') - sys.exit(errno.ENOENT) - - config = ConfigParser() - config.read(config_path) - # XXX : Setting the defaults both here in the `get` calls and the config file - # is like wearing a belt with suspenders... - return { - 'slack_token': config['tokens'].get('slack_token', ''), - 'hours': config['issue_digest'].getint('default_hours', 1), - 'slack_channel': config['issue_digest'].get('slack_channel', '#team-abc-plus'), - } - - defaults = get_defaults() parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--hours', - metavar='N', - help='Fetch issues that have been updated since N hours ago', + 'hours', + help='Fetch issues that have been updated since this many hours ago', type=int, - default=defaults['hours'], ) parser.add_argument( - '-c', - '--channel', + 'channel', help="Issues will be published to this Slack channel", type=str, - default=defaults['slack_channel'], ) parser.add_argument( - '-s', - '--slack-token', + 'slack_token', + metavar='slack-token', help='Slack auth token', type=str, - default=defaults['slack_token'], ) return parser From 7687c5a14039592d8930d6e410cb05fea3624615 Mon Sep 17 00:00:00 2001 From: James Champ Date: Thu, 11 Jan 2024 13:40:32 -0800 Subject: [PATCH 06/23] Mention leads in Slack message --- scripts/gh_scripts/issue_comment_bot.py | 44 +++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index 003d5c022b7..f7ac502e6e9 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -14,7 +14,21 @@ import requests -staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini'] +lead_label_to_username = { + 'Lead: @mekarpeles': 'mekarpeles', + 'Lead: @cdrini': 'cdrini', + 'Lead: @scottbarnes': 'scottbarnes', + 'Lead: @seabelis': 'seabelis', + 'Lead: @jimchamp': 'jimchamp', +} + +username_to_slack_id = { + 'mekarpeles': '<@mek>', + 'cdrini': '<@cdrini>', + 'scottbarnes': '<@U03MNR6T7FH>', + 'seabelis': '<@UAHQ39ACT>', + 'jimchamp': '<@U01ARTHG9EV>', +} def fetch_issues(updated_since: str): """ @@ -100,15 +114,31 @@ def filter_issues(issues: list, since: datetime): if created > since: # Next step: Determine if the last commenter is a staff member last_commenter = last_comment['user']['login'] - if last_commenter not in staff_usernames: + if last_commenter not in username_to_slack_id: + lead_label= find_lead_label(i.get('labels', [])) results.append({ 'comment_url': last_comment['html_url'], 'commenter': last_commenter, 'issue_title': i['title'], + 'lead_label': lead_label, }) return results +def find_lead_label(labels: list[str]) -> str: + """ + Finds and returns the name of the first lead label found in the given list of GitHub labels. + + Returns an empty string if no lead label is found + """ + result = '' + for label in labels: + if label['name'].startswith('Lead:'): + result = label['name'] + break + + return result + def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hours_passed: int): """ Creates a threaded Slack messaged containing a digest of recently commented GitHub issues. @@ -170,6 +200,16 @@ def comment_on_thread(message: str): issue_title = i['issue_title'] commenter = i['commenter'] message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n' + + username = lead_label_to_username.get(i['lead_label'], '') + slack_id = username_to_slack_id.get(username, '') + if slack_id: + message += f'Lead: {slack_id}\n' + elif i['lead_label']: + message += f'i["lead_label"]\n' + else: + message += 'Lead: N/A\n' + message += f'Commenter: *{commenter}*' comment_on_thread(message) From fac8e060108c61d5b54a53fa0ba6f51e0139f6bb Mon Sep 17 00:00:00 2001 From: James Champ Date: Thu, 11 Jan 2024 14:06:39 -0800 Subject: [PATCH 07/23] Add more documentation --- scripts/gh_scripts/README.md | 27 +++++++++++++++++++++++++ scripts/gh_scripts/issue_comment_bot.py | 11 ++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 scripts/gh_scripts/README.md diff --git a/scripts/gh_scripts/README.md b/scripts/gh_scripts/README.md new file mode 100644 index 00000000000..1549c5fcf4f --- /dev/null +++ b/scripts/gh_scripts/README.md @@ -0,0 +1,27 @@ +# GitHub Project Management Scripts + +This directory contains scripts that the Open Library team uses to interact with this GitHub repository. + +To quickly see a script's purpose and arguments, run the script with the `-h` or `--help` flag. + +## `issue_comment_bot.py` + +This script fetches issues that have new comments from contributors within the past number of hours, then posts a message to the team in our Slack channel. + +### Usage: +This script has three positional arguments: +``` + hours Fetch issues that have been updated since this many hours ago + channel Issues will be published to this Slack channel + slack-token Slack authentication token +``` + +__Running the script locally:__ +``` +docker compose exec -e PYTHONPATH=. web bash + +# Publish digest of new comments from the past day to #openlibrary-g: +./scripts/gh_scripts/issue_comment_bot.py 24 "#openlibrary-g" "replace-with-slack-token" +``` + +__Note:__ When adding arguments, be sure to place any hyphenated values within double quotes. diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index f7ac502e6e9..f77eeaee901 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -188,6 +188,7 @@ def comment_on_thread(message: str): }, ) if response.status_code != 200: + # XXX : Check "ok" field for errors # XXX : Log this print(f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}') # XXX : Retry logic? @@ -228,10 +229,16 @@ def start_job(args: dict): issues = fetch_issues(date_string) filtered_issues = filter_issues(issues, since) + # XXX : If we are only running this script daily, we can remove this condition to + # always post a message to Slack. If the digest is ever not published, we'll know + # that something is wrong with our script runner. if filtered_issues: publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) - # XXX : Log this - print('Digest posted to Slack') + # XXX : Log this + print('Digest posted to Slack.') + else: + # XXX : Log this + print('No issues needing attention found.') def _get_parser() -> argparse.ArgumentParser: """ From 9a535ac8ba229560ba0235ed761aabff23cacb7f Mon Sep 17 00:00:00 2001 From: James Champ Date: Thu, 11 Jan 2024 14:10:51 -0800 Subject: [PATCH 08/23] Update parent thread message --- scripts/gh_scripts/issue_comment_bot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index f77eeaee901..4c773d6b6e8 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -147,7 +147,7 @@ def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hour will include a link to the comment, as well as addiitonal information. """ # Create the parent message - parent_thread_msg = f'At least {len(issues)} new comment(s) have been left by contributors in the past {hours_passed} hour(s)' + parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago' response = requests.post( 'https://slack.com/api/chat.postMessage', From 4748e6aa2c32950e3ffe4abe92aef3777a623ff2 Mon Sep 17 00:00:00 2001 From: James Champ Date: Thu, 11 Jan 2024 14:26:44 -0800 Subject: [PATCH 09/23] Fix string interpolation bug --- scripts/gh_scripts/issue_comment_bot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index 4c773d6b6e8..30af9b21b30 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -1,6 +1,6 @@ #!/usr/bin/env python """ -Fetches Open Library GitHub issues that have been commented on +Fetches Open Library GitHub issues that have been commented on within some amount of time, in hours. Writes links to each issue to given Slack channel. @@ -207,7 +207,7 @@ def comment_on_thread(message: str): if slack_id: message += f'Lead: {slack_id}\n' elif i['lead_label']: - message += f'i["lead_label"]\n' + message += f'{i["lead_label"]}\n' else: message += 'Lead: N/A\n' @@ -219,7 +219,7 @@ def time_since(hours): now = datetime.now() # XXX : Add a minute or two to the delta (to avoid dropping issues)? since = now - timedelta(hours=hours) - return since, since.strftime(f'%Y-%m-%dT%H:%M:%S') + return since, since.strftime('%Y-%m-%dT%H:%M:%S') def start_job(args: dict): """ @@ -230,7 +230,7 @@ def start_job(args: dict): filtered_issues = filter_issues(issues, since) # XXX : If we are only running this script daily, we can remove this condition to - # always post a message to Slack. If the digest is ever not published, we'll know + # always post a message to Slack. If the digest is ever not published, we'll know # that something is wrong with our script runner. if filtered_issues: publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) From 449fc9a770c4b308f44856c6c387f13e36624de2 Mon Sep 17 00:00:00 2001 From: James Champ Date: Thu, 11 Jan 2024 14:46:43 -0800 Subject: [PATCH 10/23] Fix linting issues --- scripts/gh_scripts/issue_comment_bot.py | 28 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index 30af9b21b30..8cf1492ce96 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -11,6 +11,7 @@ import time from datetime import datetime, timedelta +from typing import Any import requests @@ -30,6 +31,7 @@ 'jimchamp': '<@U01ARTHG9EV>', } + def fetch_issues(updated_since: str): """ Fetches all GitHub issues that have been updated since the given date string and have at least one comment. @@ -43,12 +45,13 @@ def fetch_issues(updated_since: str): """ # Make initial query for updated issues: query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}' + p: dict[str, str|int] = { + 'q': query, + 'per_page': 100, + } response = requests.get( 'https://api.github.com/search/issues', - params={ - 'q': query, - 'per_page': 100, - }, + params=p, ) d = response.json() results = d['items'] @@ -76,6 +79,7 @@ def get_next_page(url: str): return results + def filter_issues(issues: list, since: datetime): """ Returns list of issues that were not last responded to by staff. @@ -115,7 +119,7 @@ def filter_issues(issues: list, since: datetime): # Next step: Determine if the last commenter is a staff member last_commenter = last_comment['user']['login'] if last_commenter not in username_to_slack_id: - lead_label= find_lead_label(i.get('labels', [])) + lead_label = find_lead_label(i.get('labels', [])) results.append({ 'comment_url': last_comment['html_url'], 'commenter': last_commenter, @@ -125,7 +129,8 @@ def filter_issues(issues: list, since: datetime): return results -def find_lead_label(labels: list[str]) -> str: + +def find_lead_label(labels: list[dict[str, Any]]) -> str: """ Finds and returns the name of the first lead label found in the given list of GitHub labels. @@ -139,12 +144,13 @@ def find_lead_label(labels: list[str]) -> str: return result -def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hours_passed: int): + +def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token: str, hours_passed: int): """ Creates a threaded Slack messaged containing a digest of recently commented GitHub issues. Parent Slack message will say how many comments were left, and the timeframe. Each reply - will include a link to the comment, as well as addiitonal information. + will include a link to the comment, as well as additional information. """ # Create the parent message parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago' @@ -214,6 +220,7 @@ def comment_on_thread(message: str): message += f'Commenter: *{commenter}*' comment_on_thread(message) + def time_since(hours): """Returns datetime and string representations of the current time, minus the given hour""" now = datetime.now() @@ -221,7 +228,8 @@ def time_since(hours): since = now - timedelta(hours=hours) return since, since.strftime('%Y-%m-%dT%H:%M:%S') -def start_job(args: dict): + +def start_job(args: argparse.Namespace): """ Starts the new comment digest job. """ @@ -240,6 +248,7 @@ def start_job(args: dict): # XXX : Log this print('No issues needing attention found.') + def _get_parser() -> argparse.ArgumentParser: """ Creates and returns an ArgumentParser containing default values which were @@ -265,6 +274,7 @@ def _get_parser() -> argparse.ArgumentParser: return parser + if __name__ == '__main__': # Process command-line arguments and starts the notification job parser = _get_parser() From ed049524d2985fc44f2fd081956c1506ea2ae537 Mon Sep 17 00:00:00 2001 From: Chales Horn Date: Wed, 17 Jan 2024 14:55:33 +1300 Subject: [PATCH 11/23] ensure bwbsku local_id is uppercased. Shouldn't be so much of an issue now, but some old records came in lowercased. Protect against it ever happening again. --- scripts/promise_batch_imports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/promise_batch_imports.py b/scripts/promise_batch_imports.py index 8a373eea502..3edbc987c08 100644 --- a/scripts/promise_batch_imports.py +++ b/scripts/promise_batch_imports.py @@ -44,7 +44,7 @@ def map_book_to_olbook(book, promise_id): isbn = book.get('ISBN') or ' ' sku = book['BookSKUB'] or book['BookSKU'] or book['BookBarcode'] olbook = { - 'local_id': [f"urn:bwbsku:{sku}"], + 'local_id': [f"urn:bwbsku:{sku.upper()}"], 'identifiers': { **({'amazon': [book.get('ASIN')]} if not asin_is_isbn_10 else {}), **({'better_world_books': [isbn]} if not is_isbn_13(isbn) else {}), From 5a2ce149685ec2132ea6053e4d428532e957b580 Mon Sep 17 00:00:00 2001 From: James Champ Date: Wed, 17 Jan 2024 09:58:12 -0800 Subject: [PATCH 12/23] Exclude Charles' comments from the digest --- scripts/gh_scripts/issue_comment_bot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index 8cf1492ce96..f5618f29996 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -15,6 +15,7 @@ import requests +# Maps lead label to GitHub username lead_label_to_username = { 'Lead: @mekarpeles': 'mekarpeles', 'Lead: @cdrini': 'cdrini', @@ -23,12 +24,14 @@ 'Lead: @jimchamp': 'jimchamp', } +# Maps GitHub username to Slack ID username_to_slack_id = { 'mekarpeles': '<@mek>', 'cdrini': '<@cdrini>', 'scottbarnes': '<@U03MNR6T7FH>', 'seabelis': '<@UAHQ39ACT>', 'jimchamp': '<@U01ARTHG9EV>', + 'hornc': '<@U0EUS8DV0>', } From d64a630c966921d99795682957675bd4b5b815aa Mon Sep 17 00:00:00 2001 From: James Champ Date: Wed, 17 Jan 2024 13:30:18 -0800 Subject: [PATCH 13/23] Create workflow for comment digest job --- .github/workflows/new_comment_digest.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/new_comment_digest.yml diff --git a/.github/workflows/new_comment_digest.yml b/.github/workflows/new_comment_digest.yml new file mode 100644 index 00000000000..ce72caf6430 --- /dev/null +++ b/.github/workflows/new_comment_digest.yml @@ -0,0 +1,21 @@ +name: new_comment_digest +on: + schedule: # 08:30 daily + - cron: '30 8 * * *' + workflow_dispatch: +permissions: + contents: read + +jobs: + new_comment_digest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - run: pip install requests + - run: scripts/gh_scripts/issue_comment_bot.py 24 "$SLACK_CHANNEL" "$SLACK_TOKEN" + env: + SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} + SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL_ABC_TEAM_PLUS }} From ec89090c79f2d0c838449081a6599741e1bc1162 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Jan 2024 21:31:16 +0000 Subject: [PATCH 14/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/gh_scripts/issue_comment_bot.py | 41 ++++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py index f5618f29996..9f12ec0f9c6 100755 --- a/scripts/gh_scripts/issue_comment_bot.py +++ b/scripts/gh_scripts/issue_comment_bot.py @@ -48,7 +48,7 @@ def fetch_issues(updated_since: str): """ # Make initial query for updated issues: query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}' - p: dict[str, str|int] = { + p: dict[str, str | int] = { 'q': query, 'per_page': 100, } @@ -93,12 +93,7 @@ def filter_issues(issues: list, since: datetime): for i in issues: # Fetch comments using URL from previous GitHub search results comments_url = i.get('comments_url') - resp = requests.get( - comments_url, - params={ - 'per_page': 100 - } - ) + resp = requests.get(comments_url, params={'per_page': 100}) # Ensure that we have the last page of comments links = resp.links @@ -123,12 +118,14 @@ def filter_issues(issues: list, since: datetime): last_commenter = last_comment['user']['login'] if last_commenter not in username_to_slack_id: lead_label = find_lead_label(i.get('labels', [])) - results.append({ - 'comment_url': last_comment['html_url'], - 'commenter': last_commenter, - 'issue_title': i['title'], - 'lead_label': lead_label, - }) + results.append( + { + 'comment_url': last_comment['html_url'], + 'commenter': last_commenter, + 'issue_title': i['title'], + 'lead_label': lead_label, + } + ) return results @@ -148,7 +145,12 @@ def find_lead_label(labels: list[dict[str, Any]]) -> str: return result -def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token: str, hours_passed: int): +def publish_digest( + issues: list[dict[str, str]], + slack_channel: str, + slack_token: str, + hours_passed: int, +): """ Creates a threaded Slack messaged containing a digest of recently commented GitHub issues. @@ -156,7 +158,9 @@ def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token will include a link to the comment, as well as additional information. """ # Create the parent message - parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago' + parent_thread_msg = ( + f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago' + ) response = requests.post( 'https://slack.com/api/chat.postMessage', @@ -199,7 +203,9 @@ def comment_on_thread(message: str): if response.status_code != 200: # XXX : Check "ok" field for errors # XXX : Log this - print(f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}') + print( + f'Failed to POST slack message\n Status code: {response.status_code}\n Message: {message}' + ) # XXX : Retry logic? for i in issues: @@ -238,12 +244,11 @@ def start_job(args: argparse.Namespace): """ since, date_string = time_since(args.hours) issues = fetch_issues(date_string) - filtered_issues = filter_issues(issues, since) # XXX : If we are only running this script daily, we can remove this condition to # always post a message to Slack. If the digest is ever not published, we'll know # that something is wrong with our script runner. - if filtered_issues: + if filtered_issues := filter_issues(issues, since): publish_digest(filtered_issues, args.channel, args.slack_token, args.hours) # XXX : Log this print('Digest posted to Slack.') From b78ab04867c5cf63db59deaa56009a1f71ec18ac Mon Sep 17 00:00:00 2001 From: jimchamp Date: Thu, 18 Jan 2024 09:30:25 -0800 Subject: [PATCH 15/23] Update Node version in `key` --- .github/workflows/javascript_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/javascript_tests.yml b/.github/workflows/javascript_tests.yml index 14f22bda8d5..7891b7efc38 100644 --- a/.github/workflows/javascript_tests.yml +++ b/.github/workflows/javascript_tests.yml @@ -35,7 +35,7 @@ jobs: # `node_modules` directly. path: 'node_modules' # Note the version number in this string -- update it when updating Node! - key: ${{ runner.os }}-node16-${{ hashFiles('**/package-lock.json') }} + key: ${{ runner.os }}-node20-${{ hashFiles('**/package-lock.json') }} - if: steps.npm-cache.outputs.cache-hit != 'true' run: npm install --no-audit - run: npm run lint From b18239e358614f292a70ac482384861d8a9a3f8f Mon Sep 17 00:00:00 2001 From: jimchamp Date: Thu, 18 Jan 2024 09:36:53 -0800 Subject: [PATCH 16/23] Add comment as reminder --- .github/workflows/javascript_tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/javascript_tests.yml b/.github/workflows/javascript_tests.yml index 7891b7efc38..12440d142d4 100644 --- a/.github/workflows/javascript_tests.yml +++ b/.github/workflows/javascript_tests.yml @@ -24,7 +24,8 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-node@v3 with: - node-version: '20' # Should match what's in our Dockerfile + # Should match what's in our Dockerfile + node-version: '20' # Also update the `key` in the `with` map, below - uses: actions/cache@v3 id: npm-cache with: From 49ee282f2d1ba91638cdc9b213e644da42f82421 Mon Sep 17 00:00:00 2001 From: Mek Date: Fri, 19 Jan 2024 11:25:07 -0800 Subject: [PATCH 17/23] Add client_id, s3, scope to bulk availability API (#7522) Co-authored-by: Christian Clauss --- openlibrary/core/lending.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/openlibrary/core/lending.py b/openlibrary/core/lending.py index d7e2a1949cb..f59df48a475 100644 --- a/openlibrary/core/lending.py +++ b/openlibrary/core/lending.py @@ -308,7 +308,22 @@ def update_availability_schema_to_v2(v1_resp, ocaid): url = '{}?{}={}'.format(config_ia_availability_api_v2_url, key, ','.join(ids)) try: - response = requests.get(url, timeout=config_http_request_timeout) + client_ip = web.ctx.env.get('HTTP_X_FORWARDED_FOR', 'ol-internal') + params = {"scope": "printdisabled"} + headers = { + "x-preferred-client-id": client_ip, + "x-application-id": "openlibrary", + } + if config_ia_ol_metadata_write_s3: + headers["authorization"] = "LOW {s3_key}:{s3_secret}".format( + **config_ia_ol_metadata_write_s3 + ) + + # Make authenticated request to Bulk Availability API + response = requests.get( + url, params=params, headers=headers, timeout=config_http_request_timeout + ) + items = response.json().get('responses', {}) for pkey in items: ocaid = pkey if key == 'identifier' else items[pkey].get('identifier') From df3eab85133974def0d40807ea61384b6b59a437 Mon Sep 17 00:00:00 2001 From: "Mark F. Heiman" Date: Fri, 19 Jan 2024 21:19:44 -0600 Subject: [PATCH 18/23] Add Unicode folding to text_title_sort SOLR type --- conf/solr/conf/managed-schema.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/solr/conf/managed-schema.xml b/conf/solr/conf/managed-schema.xml index c74ebff8f0e..b513a8f9c51 100644 --- a/conf/solr/conf/managed-schema.xml +++ b/conf/solr/conf/managed-schema.xml @@ -514,8 +514,8 @@ - + Date: Mon, 22 Jan 2024 13:04:06 -0800 Subject: [PATCH 19/23] [pre-commit.ci] pre-commit autoupdate (#8745) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.1.13 → v0.1.14](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.13...v0.1.14) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ab3ba6b3b73..ddfcd8a69dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: - id: auto-walrus - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.1.14 hooks: - id: ruff From bfc63f1aa819f3cd13426c9257d1a59526e8da8a Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Mon, 22 Jan 2024 16:06:38 -0500 Subject: [PATCH 20/23] Fix share modal iframe url (#8738) --- openlibrary/macros/ShareModal.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openlibrary/macros/ShareModal.html b/openlibrary/macros/ShareModal.html index ede2b16d511..8ec6e488ec6 100644 --- a/openlibrary/macros/ShareModal.html +++ b/openlibrary/macros/ShareModal.html @@ -6,7 +6,7 @@ $def embed_iframe(page_url): - +
$:link_markup From cb380d7fa2fd1de80083b4f9451f4d0f2456ac79 Mon Sep 17 00:00:00 2001 From: Rebecca Shoptaw Date: Tue, 23 Jan 2024 09:00:37 -0500 Subject: [PATCH 21/23] Switch Portuguese translations for 3 phrases. --- openlibrary/i18n/pt/messages.po | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openlibrary/i18n/pt/messages.po b/openlibrary/i18n/pt/messages.po index fafca1c8542..bfd77928985 100644 --- a/openlibrary/i18n/pt/messages.po +++ b/openlibrary/i18n/pt/messages.po @@ -638,7 +638,7 @@ msgstr "Pedir emprestado \"%(title)s\"" #: ReadButton.html:15 type/list/embed.html:79 widget.html:39 msgid "Borrow" -msgstr "Emprestar" +msgstr "Empréstimo" #: widget.html:45 #, python-format @@ -3172,7 +3172,7 @@ msgstr "Explorar o Centro de Desenvolvimento da Open Library" #: lib/nav_foot.html:37 lib/nav_head.html:25 msgid "Developer Center" -msgstr "Centro de Desenvolvedor" +msgstr "Central de desenvolvedor" #: lib/nav_foot.html:38 msgid "Explore Open Library APIs" @@ -3204,7 +3204,7 @@ msgstr "Adicionar um novo livro à Open Library" #: lib/nav_foot.html:47 msgid "Help Center" -msgstr "Centro de ajuda" +msgstr "Central de ajuda" #: lib/nav_foot.html:48 msgid "Problems" From e9df97983f8a4e0453a5667e4bf2e46e337b2705 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 9 Jan 2024 14:15:22 -0500 Subject: [PATCH 22/23] Add sentry transactions to covers server --- conf/coverstore.yml | 1 + openlibrary/plugins/openlibrary/sentry.py | 4 +- openlibrary/utils/sentry.py | 66 ++++++++++++++--------- 3 files changed, 43 insertions(+), 28 deletions(-) diff --git a/conf/coverstore.yml b/conf/coverstore.yml index 2baaebdec59..44c538f481c 100644 --- a/conf/coverstore.yml +++ b/conf/coverstore.yml @@ -12,4 +12,5 @@ sentry: enabled: false # Dummy endpoint; where sentry logs are sent to dsn: 'https://examplePublicKey@o0.ingest.sentry.io/0' + traces_sample_rate: 1.0 environment: 'local' diff --git a/openlibrary/plugins/openlibrary/sentry.py b/openlibrary/plugins/openlibrary/sentry.py index e21ef55c4bd..ef729885c6e 100644 --- a/openlibrary/plugins/openlibrary/sentry.py +++ b/openlibrary/plugins/openlibrary/sentry.py @@ -1,6 +1,6 @@ import infogami from infogami.utils import delegate -from openlibrary.utils.sentry import Sentry, SentryProcessor +from openlibrary.utils.sentry import Sentry, InfogamiSentryProcessor sentry: Sentry | None = None @@ -12,4 +12,4 @@ def setup(): if sentry.enabled: sentry.init() delegate.add_exception_hook(lambda: sentry.capture_exception_webpy()) - delegate.app.add_processor(SentryProcessor()) + delegate.app.add_processor(InfogamiSentryProcessor(delegate.app)) diff --git a/openlibrary/utils/sentry.py b/openlibrary/utils/sentry.py index 8de2c91a923..cd17f266abb 100644 --- a/openlibrary/utils/sentry.py +++ b/openlibrary/utils/sentry.py @@ -76,6 +76,7 @@ def capture_exception(): return _internalerror() app.internalerror = capture_exception + app.add_processor(WebPySentryProcessor(app)) def capture_exception_webpy(self): with sentry_sdk.push_scope() as scope: @@ -97,12 +98,48 @@ def to_sentry_name(self) -> str: ) -class SentryProcessor: +class WebPySentryProcessor: + def __init__(self, app: web.application): + self.app = app + + def find_route_name(self) -> str: + handler, groups = self.app._match(self.app.mapping, web.ctx.path) + web.debug('ROUTE HANDLER', handler, groups) + return handler or '' + + def __call__(self, handler): + route_name = self.find_route_name() + hub = sentry_sdk.Hub.current + with sentry_sdk.Hub(hub) as hub: + with hub.configure_scope() as scope: + scope.clear_breadcrumbs() + scope.add_event_processor(add_web_ctx_to_event) + + environ = dict(web.ctx.env) + # Don't forward cookies to Sentry + if 'HTTP_COOKIE' in environ: + del environ['HTTP_COOKIE'] + + transaction = Transaction.continue_from_environ( + environ, + op="http.server", + name=route_name, + source=TRANSACTION_SOURCE_ROUTE, + ) + + with hub.start_transaction(transaction): + try: + return handler() + finally: + transaction.set_http_status(int(web.ctx.status.split()[0])) + + +class InfogamiSentryProcessor(WebPySentryProcessor): """ Processor to profile the webpage and send a transaction to Sentry. """ - def __call__(self, handler): + def find_route_name(self) -> str: def find_type() -> tuple[str, str] | None: return next( ( @@ -134,27 +171,4 @@ def find_route() -> InfogamiRoute: return result - route = find_route() - hub = sentry_sdk.Hub.current - with sentry_sdk.Hub(hub) as hub: - with hub.configure_scope() as scope: - scope.clear_breadcrumbs() - scope.add_event_processor(add_web_ctx_to_event) - - environ = dict(web.ctx.env) - # Don't forward cookies to Sentry - if 'HTTP_COOKIE' in environ: - del environ['HTTP_COOKIE'] - - transaction = Transaction.continue_from_environ( - environ, - op="http.server", - name=route.to_sentry_name(), - source=TRANSACTION_SOURCE_ROUTE, - ) - - with hub.start_transaction(transaction): - try: - return handler() - finally: - transaction.set_http_status(int(web.ctx.status.split()[0])) + return find_route().to_sentry_name() From a014cedcbfe8b06e5be076983ae5de9ff1ce9279 Mon Sep 17 00:00:00 2001 From: Drini Cami Date: Tue, 9 Jan 2024 14:30:28 -0500 Subject: [PATCH 23/23] Switch covers.GET to use return instead of raise This lets the requests correctly be reflected in Sentry as non-erroring --- openlibrary/coverstore/code.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py index 0d34b7fb976..9900f2bc8eb 100644 --- a/openlibrary/coverstore/code.py +++ b/openlibrary/coverstore/code.py @@ -239,17 +239,9 @@ def notfound(): ): return read_file(config.default_image) elif is_valid_url(i.default): - raise web.seeother(i.default) + return web.seeother(i.default) else: - raise web.notfound("") - - def redirect(id): - size_part = size and ("-" + size) or "" - url = f"/{category}/id/{id}{size_part}.jpg" - - if query := web.ctx.env.get('QUERY_STRING'): - url += '?' + query - raise web.found(url) + return web.notfound("") if key == 'isbn': value = value.replace("-", "").strip() # strip hyphens from ISBN @@ -257,7 +249,7 @@ def redirect(id): elif key == 'ia': url = self.get_ia_cover_url(value, size) if url: - raise web.found(url) + return web.found(url) else: value = None # notfound or redirect to default. handled later. elif key != 'id': @@ -269,7 +261,7 @@ def redirect(id): # redirect to archive.org cluster for large size and original images whenever possible if size in ("L", "") and self.is_cover_in_cluster(value): url = zipview_url_from_id(int(value), size) - raise web.found(url) + return web.found(url) # covers_0008 batches [_00, _82] are tar'd / zip'd in archive.org items if isinstance(value, int) or value.isnumeric(): # noqa: SIM102 @@ -281,7 +273,7 @@ def redirect(id): item_file = f"{pid}{'-' + size.upper() if size else ''}" path = f"{item_id}/{item_tar}/{item_file}.jpg" protocol = web.ctx.protocol - raise web.found(f"{protocol}://archive.org/download/{path}") + return web.found(f"{protocol}://archive.org/download/{path}") d = self.get_details(value, size.lower()) if not d: @@ -291,7 +283,7 @@ def redirect(id): if key == 'id': etag = f"{d.id}-{size.lower()}" if not web.modified(trim_microsecond(d.created), etag=etag): - raise web.notmodified() + return web.notmodified() web.header('Cache-Control', 'public') # this image is not going to expire in next 100 years. @@ -306,14 +298,14 @@ def redirect(id): from openlibrary.coverstore import archive if d.id >= 8_820_000 and d.uploaded and '.zip' in d.filename: - raise web.found( + return web.found( archive.Cover.get_cover_url( d.id, size=size, protocol=web.ctx.protocol ) ) return read_image(d, size) except OSError: - raise web.notfound() + return web.notfound() def get_ia_cover_url(self, identifier, size="M"): url = "https://archive.org/metadata/%s/metadata" % identifier