From 15dc0bca09f623cb275a80e74f1df7837d2b98fb Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Wed, 10 Jan 2024 10:17:08 -0800
Subject: [PATCH 01/23] Add argument parser and default options

---
 scripts/gh_scripts/config/bot.ini       |  7 +++
 scripts/gh_scripts/issue_comment_bot.py | 82 +++++++++++++++++++++++++
 2 files changed, 89 insertions(+)
 create mode 100644 scripts/gh_scripts/config/bot.ini
 create mode 100755 scripts/gh_scripts/issue_comment_bot.py
diff --git a/scripts/gh_scripts/config/bot.ini b/scripts/gh_scripts/config/bot.ini
new file mode 100644
index 00000000000..d8a6609d1f2
--- /dev/null
+++ b/scripts/gh_scripts/config/bot.ini
@@ -0,0 +1,7 @@
+[tokens]
+slack_token=<your Slack token>
+github_token=<your GitHub token>
+
+[issue_digest]
+default_hours=48
+slack_channel=#team-abc-plus
diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
new file mode 100755
index 00000000000..f708abcb6ee
--- /dev/null
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+"""
+Fetches Open Library GitHub issues that have been commented on 
+within some amount of time, in hours.
+
+Writes links to each issue to Slack channel #team-abc-plus
+"""
+import argparse
+import errno
+import sys
+
+from configparser import ConfigParser
+from pathlib import Path
+
+
+config_file = 'config/bot.ini'
+
+def start_job(args: dict):
+    """
+    Starts the new comment digest job.
+    """
+    pass
+
+
+def _get_parser() -> argparse.ArgumentParser:
+    """
+    Creates and returns an ArgumentParser containing default values which were
+    read from the config file.
+    """
+    def get_defaults():
+        """
+        Reads the config file and returns a dict containing the default
+        values for this script's arguments.
+
+        Location of config file is expected to be a nested within this file's parent
+        directory.  The relative file location is stored as `config_file`.
+        """
+        this_dir = Path(__file__).parent.resolve()
+        config_path = this_dir / Path(config_file)
+        if not config_path.exists():
+            # XXX : Log to file:
+            print(f'{config_file} does not exist.')
+            sys.exit(errno.ENOENT)
+
+        config = ConfigParser()
+        config.read(config_path)
+        return {
+            'slack_token': config['tokens'].get('slack_token', ''),
+            'hours': config['issue_digest'].getint('default_hours', 1),
+            'slack_channel': config['issue_digest'].get('slack_channel', '#team-abc-plus'),
+        }
+
+    defaults = get_defaults()
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--hours',
+        metavar='N',
+        help='Fetch issues that have been updated since N hours ago',
+        type=int,
+        default=defaults['hours'],
+    )
+    parser.add_argument(
+        '-c',
+        '--channel',
+        help="Issues will be published to this Slack channel",
+        type=str,
+        default=defaults['slack_channel'],
+    )
+    parser.add_argument(
+        '-t',
+        '--token',
+        help='Slack auth token',
+        type=str,
+        default=defaults['slack_token'],
+    )
+
+    return parser
+
+if __name__ == '__main__':
+    parser = _get_parser()
+    args = parser.parse_args()
+    start_job(args)

From 89bc2f1fdbda1433670cce9208d35d3b33369351 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Wed, 10 Jan 2024 10:23:31 -0800
Subject: [PATCH 02/23] Fetch and filter GitHub issues

---
 scripts/gh_scripts/issue_comment_bot.py | 74 ++++++++++++++++++++++++-
 scripts/gh_scripts/requirements.txt     |  1 +
 2 files changed, 74 insertions(+), 1 deletion(-)
 create mode 100644 scripts/gh_scripts/requirements.txt

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index f708abcb6ee..a5075c98e4a 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -10,16 +10,88 @@
 import sys
 
 from configparser import ConfigParser
+from datetime import datetime, timedelta
 from pathlib import Path
 
+import requests
 
 config_file = 'config/bot.ini'
 
+# XXX : Configure?
+staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini']
+
+def fetch_issues(updated_since: str):
+    query = f'repo:internetarchive/openlibrary is:open is:issue comments:>1 updated:>{updated_since}'
+    response = requests.get(
+        'https://api.github.com/search/issues?per_page=100',
+        params={
+            'q': query,
+        },
+    )
+    d = response.json()
+    results = d['items']
+
+    # Pagination
+    def get_next_page(url: str):
+        """Returns list of issues and optional url for next page"""
+        resp = requests.get(url)
+        # Get issues
+        d = resp.json()
+        issues = d['items']
+        # Prepare url for next page
+        next = resp.links.get('next', {})
+        next_url = next.get('url', '')
+
+        return issues, next_url
+
+    links = response.links
+    next = links.get('next', {})
+    next_url = next.get('url', '')
+    while next_url:
+        # Make call with next link
+        issues, next_url = get_next_page(next_url)
+        results = results + issues
+
+    return results
+
+def filter_issues(issues: list, since_date: str):
+    """
+    Returns list of issues that were not last responded to by staff.
+    Requires fetching the most recent comments for the given issues.
+    """
+    filtered_issues = []
+
+    for i in issues:
+        comments_url = i.get('comments_url')
+        resp = requests.get(f'{comments_url}?since{since_date}Z')
+        comments = resp.json()
+        last_comment = comments[-1]
+        last_commenter = last_comment['user']['login']
+
+        if last_commenter not in staff_usernames:
+            filtered_issues.append(i)
+
+    return filtered_issues
+
+def publish_digest(issues: list[str], slack_channel: str, slack_token: str):
+    pass
+
+def create_date_string(hours):
+    now = datetime.now()
+    # XXX : Add a minute or two to the delta?
+    since = now - timedelta(hours=hours)
+    return since.strftime(f'%Y-%m-%dT%H:%M:%S')
+
 def start_job(args: dict):
     """
     Starts the new comment digest job.
     """
-    pass
+    print(args)
+    date_string = create_date_string(args.hours)
+    issues = fetch_issues(date_string)
+    filtered_issues = filter_issues(issues, date_string)
+    if filtered_issues:
+        publish_digest(filtered_issues, args.channel, args.token)
 
 
 def _get_parser() -> argparse.ArgumentParser:
diff --git a/scripts/gh_scripts/requirements.txt b/scripts/gh_scripts/requirements.txt
new file mode 100644
index 00000000000..2c24336eb31
--- /dev/null
+++ b/scripts/gh_scripts/requirements.txt
@@ -0,0 +1 @@
+requests==2.31.0

From dc81b181d5717d5122feda5e30704074b65fdc9d Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Wed, 10 Jan 2024 11:37:42 -0800
Subject: [PATCH 03/23] Publish results to Slack

---
 scripts/gh_scripts/issue_comment_bot.py | 55 +++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index a5075c98e4a..7c40ae3bb3e 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -8,6 +8,7 @@
 import argparse
 import errno
 import sys
+import time
 
 from configparser import ConfigParser
 from datetime import datetime, timedelta
@@ -74,7 +75,55 @@ def filter_issues(issues: list, since_date: str):
     return filtered_issues
 
 def publish_digest(issues: list[str], slack_channel: str, slack_token: str):
-    pass
+    parent_thread_msg = f'There are {len(issues)} issue(s) awaiting response.  More details in this thread.'
+    response = requests.post(
+        'https://slack.com/api/chat.postMessage',
+        headers={
+            'Authorization': f"Bearer {slack_token}",
+            'Content-Type': 'application/json;  charset=utf-8',
+        },
+        json={
+            'channel': slack_channel,
+            'text': parent_thread_msg,
+        },
+    )
+
+    if response.status_code != 200:
+        # XXX : Log this
+        print(f'Failed to send message to Slack.  Status code: {response.status_code}')
+        # XXX : Add retry logic
+        sys.exit(errno.ECOMM)
+
+    d = response.json()
+    # Store timestamp, which, along with the channel, uniquely identifies the parent thread
+    ts = d.get('ts')
+
+    def comment_on_thread(message: str):
+        response = requests.post(
+            'https://slack.com/api/chat.postMessage',
+            headers={
+                'Authorization': f"Bearer {slack_token}",
+                'Content-Type': 'application/json;  charset=utf-8',
+            },
+            json={
+                'channel': slack_channel,
+                'text': message,
+                'thread_ts': ts
+            },
+        )
+        if response.status_code != 200:
+            # XXX : Log this
+            print(f'Failed to POST slack message\n  Status code: {response.status_code}\n  Message: {message}')
+            # XXX : Retry logic?
+
+    for issue in issues:
+        # Slack rate limit is roughly 1 request per second
+        time.sleep(1)
+
+        issue_url = issue.get('html_url')
+        issue_title = issue.get('title')
+        message = f'<{issue_url}|*{issue_title}*>'
+        comment_on_thread(message)
 
 def create_date_string(hours):
     now = datetime.now()
@@ -86,13 +135,13 @@ def start_job(args: dict):
     """
     Starts the new comment digest job.
     """
-    print(args)
     date_string = create_date_string(args.hours)
     issues = fetch_issues(date_string)
     filtered_issues = filter_issues(issues, date_string)
     if filtered_issues:
         publish_digest(filtered_issues, args.channel, args.token)
-
+    # XXX : Log this
+    print('Digest POSTed to Slack')
 
 def _get_parser() -> argparse.ArgumentParser:
     """

From 6d5d9b4b0b29cba1571b038939f1d8d81e833925 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Wed, 10 Jan 2024 17:58:35 -0800
Subject: [PATCH 04/23] Add more information to digest

---
 scripts/gh_scripts/issue_comment_bot.py | 110 ++++++++++++++++++------
 1 file changed, 84 insertions(+), 26 deletions(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index 7c40ae3bb3e..b21e7e65f26 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -22,17 +22,29 @@
 staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini']
 
 def fetch_issues(updated_since: str):
-    query = f'repo:internetarchive/openlibrary is:open is:issue comments:>1 updated:>{updated_since}'
+    """
+    Fetches all GitHub issues that have been updated since the given date string and have at least one comment.
+
+    GitHub results are paginated.  This functions appends each result to a list, and does so for all pages.
+    To keep API calls to a minimum, we request the maximum number of results per request (100 per page, as of writing).
+
+    Important: Updated issues need not have a recent comment. Update events include many other things, such as adding a
+    label to an issue, or moving an issue to a milestone.  Issues returned by this function will require additional
+    processing in order to determine if they have recent comments.
+    """
+    # Make initial query for updated issues:
+    query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}'
     response = requests.get(
-        'https://api.github.com/search/issues?per_page=100',
+        'https://api.github.com/search/issues',
         params={
             'q': query,
+            'per_page': 100,
         },
     )
     d = response.json()
     results = d['items']
 
-    # Pagination
+    # Fetch additional updated issues, if any exist
     def get_next_page(url: str):
         """Returns list of issues and optional url for next page"""
         resp = requests.get(url)
@@ -55,27 +67,63 @@ def get_next_page(url: str):
 
     return results
 
-def filter_issues(issues: list, since_date: str):
+def filter_issues(issues: list, since: datetime):
     """
     Returns list of issues that were not last responded to by staff.
     Requires fetching the most recent comments for the given issues.
     """
-    filtered_issues = []
+    results = []
 
     for i in issues:
+        # Fetch comments using URL from previous GitHub search results
         comments_url = i.get('comments_url')
-        resp = requests.get(f'{comments_url}?since{since_date}Z')
+        resp = requests.get(
+            comments_url,
+            params={
+                'per_page': 100
+            }
+        )
+
+        # Ensure that we have the last page of comments
+        links = resp.links
+        last = links.get('last', {})
+        last_url = last.get('url', '')
+
+        if last_url:
+            resp = requests.get(last_url)
+
+        # Get last comment
         comments = resp.json()
         last_comment = comments[-1]
-        last_commenter = last_comment['user']['login']
 
-        if last_commenter not in staff_usernames:
-            filtered_issues.append(i)
+        # Determine if last comment meets our criteria for Slack notifications
+        # First step: Ensure that the last comment was left after the given `since` datetime
+        created = datetime.fromisoformat(last_comment['created_at'])
+        # Removing timezone info to avoid TypeErrors, which occur when
+        # comparing a timezone-aware datetime with a timezone-naive datetime
+        created = created.replace(tzinfo=None)
+        if created > since:
+            # Next step: Determine if the last commenter is a staff member
+            last_commenter = last_comment['user']['login']
+            if last_commenter not in staff_usernames:
+                results.append({
+                    'comment_url': last_comment['html_url'],
+                    'commenter': last_commenter,
+                    'issue_title': i['title'],
+                })
 
-    return filtered_issues
+    return results
+
+def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hours_passed: int):
+    """
+    Creates a threaded Slack messaged containing a digest of recently commented GitHub issues.
+
+    Parent Slack message will say how many comments were left, and the timeframe. Each reply
+    will include a link to the comment, as well as addiitonal information.
+    """
+    # Create the parent message
+    parent_thread_msg = f'At least {len(issues)} new comment(s) have been left by contributors in the past {hours_passed} hour(s)'
 
-def publish_digest(issues: list[str], slack_channel: str, slack_token: str):
-    parent_thread_msg = f'There are {len(issues)} issue(s) awaiting response.  More details in this thread.'
     response = requests.post(
         'https://slack.com/api/chat.postMessage',
         headers={
@@ -91,7 +139,7 @@ def publish_digest(issues: list[str], slack_channel: str, slack_token: str):
     if response.status_code != 200:
         # XXX : Log this
         print(f'Failed to send message to Slack.  Status code: {response.status_code}')
-        # XXX : Add retry logic
+        # XXX : Add retry logic?
         sys.exit(errno.ECOMM)
 
     d = response.json()
@@ -99,6 +147,9 @@ def publish_digest(issues: list[str], slack_channel: str, slack_token: str):
     ts = d.get('ts')
 
     def comment_on_thread(message: str):
+        """
+        Posts the given message as a reply to the parent message.
+        """
         response = requests.post(
             'https://slack.com/api/chat.postMessage',
             headers={
@@ -108,7 +159,7 @@ def comment_on_thread(message: str):
             json={
                 'channel': slack_channel,
                 'text': message,
-                'thread_ts': ts
+                'thread_ts': ts,
             },
         )
         if response.status_code != 200:
@@ -116,30 +167,34 @@ def comment_on_thread(message: str):
             print(f'Failed to POST slack message\n  Status code: {response.status_code}\n  Message: {message}')
             # XXX : Retry logic?
 
-    for issue in issues:
+    for i in issues:
         # Slack rate limit is roughly 1 request per second
         time.sleep(1)
 
-        issue_url = issue.get('html_url')
-        issue_title = issue.get('title')
-        message = f'<{issue_url}|*{issue_title}*>'
+        comment_url = i['comment_url']
+        issue_title = i['issue_title']
+        commenter = i['commenter']
+        message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n'
+        message += f'Commenter: *{commenter}*'
         comment_on_thread(message)
 
-def create_date_string(hours):
+def time_since(hours):
+    """Returns datetime and string representations of the current time, minus the given hour"""
     now = datetime.now()
-    # XXX : Add a minute or two to the delta?
+    # XXX : Add a minute or two to the delta (to avoid dropping issues)?
     since = now - timedelta(hours=hours)
-    return since.strftime(f'%Y-%m-%dT%H:%M:%S')
+    return since, since.strftime(f'%Y-%m-%dT%H:%M:%S')
 
 def start_job(args: dict):
     """
     Starts the new comment digest job.
     """
-    date_string = create_date_string(args.hours)
+    since, date_string = time_since(args.hours)
     issues = fetch_issues(date_string)
-    filtered_issues = filter_issues(issues, date_string)
+    filtered_issues = filter_issues(issues, since)
+
     if filtered_issues:
-        publish_digest(filtered_issues, args.channel, args.token)
+        publish_digest(filtered_issues, args.channel, args.slack_token, args.hours)
     # XXX : Log this
     print('Digest POSTed to Slack')
 
@@ -165,6 +220,8 @@ def get_defaults():
 
         config = ConfigParser()
         config.read(config_path)
+        # XXX : Setting the defaults both here in the `get` calls and the config file
+        # is like wearing a belt with suspenders...
         return {
             'slack_token': config['tokens'].get('slack_token', ''),
             'hours': config['issue_digest'].getint('default_hours', 1),
@@ -188,8 +245,8 @@ def get_defaults():
         default=defaults['slack_channel'],
     )
     parser.add_argument(
-        '-t',
-        '--token',
+        '-s',
+        '--slack-token',
         help='Slack auth token',
         type=str,
         default=defaults['slack_token'],
@@ -198,6 +255,7 @@ def get_defaults():
     return parser
 
 if __name__ == '__main__':
+    # Process command-line arguments and starts the notification job
     parser = _get_parser()
     args = parser.parse_args()
     start_job(args)

From 4fae3a36084c2ff3c32a3246ec24e089924e2d56 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Thu, 11 Jan 2024 11:39:30 -0800
Subject: [PATCH 05/23] Remove default configuration

---
 scripts/gh_scripts/config/bot.ini       |  7 ----
 scripts/gh_scripts/issue_comment_bot.py | 50 ++++---------------------
 2 files changed, 7 insertions(+), 50 deletions(-)
 delete mode 100644 scripts/gh_scripts/config/bot.ini

diff --git a/scripts/gh_scripts/config/bot.ini b/scripts/gh_scripts/config/bot.ini
deleted file mode 100644
index d8a6609d1f2..00000000000
--- a/scripts/gh_scripts/config/bot.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-[tokens]
-slack_token=<your Slack token>
-github_token=<your GitHub token>
-
-[issue_digest]
-default_hours=48
-slack_channel=#team-abc-plus
diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index b21e7e65f26..003d5c022b7 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -3,22 +3,17 @@
 Fetches Open Library GitHub issues that have been commented on 
 within some amount of time, in hours.
 
-Writes links to each issue to Slack channel #team-abc-plus
+Writes links to each issue to given Slack channel.
 """
 import argparse
 import errno
 import sys
 import time
 
-from configparser import ConfigParser
 from datetime import datetime, timedelta
-from pathlib import Path
 
 import requests
 
-config_file = 'config/bot.ini'
-
-# XXX : Configure?
 staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini']
 
 def fetch_issues(updated_since: str):
@@ -196,60 +191,29 @@ def start_job(args: dict):
     if filtered_issues:
         publish_digest(filtered_issues, args.channel, args.slack_token, args.hours)
     # XXX : Log this
-    print('Digest POSTed to Slack')
+    print('Digest posted to Slack')
 
 def _get_parser() -> argparse.ArgumentParser:
     """
     Creates and returns an ArgumentParser containing default values which were
     read from the config file.
     """
-    def get_defaults():
-        """
-        Reads the config file and returns a dict containing the default
-        values for this script's arguments.
-
-        Location of config file is expected to be a nested within this file's parent
-        directory.  The relative file location is stored as `config_file`.
-        """
-        this_dir = Path(__file__).parent.resolve()
-        config_path = this_dir / Path(config_file)
-        if not config_path.exists():
-            # XXX : Log to file:
-            print(f'{config_file} does not exist.')
-            sys.exit(errno.ENOENT)
-
-        config = ConfigParser()
-        config.read(config_path)
-        # XXX : Setting the defaults both here in the `get` calls and the config file
-        # is like wearing a belt with suspenders...
-        return {
-            'slack_token': config['tokens'].get('slack_token', ''),
-            'hours': config['issue_digest'].getint('default_hours', 1),
-            'slack_channel': config['issue_digest'].get('slack_channel', '#team-abc-plus'),
-        }
-
-    defaults = get_defaults()
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
-        '--hours',
-        metavar='N',
-        help='Fetch issues that have been updated since N hours ago',
+        'hours',
+        help='Fetch issues that have been updated since this many hours ago',
         type=int,
-        default=defaults['hours'],
     )
     parser.add_argument(
-        '-c',
-        '--channel',
+        'channel',
         help="Issues will be published to this Slack channel",
         type=str,
-        default=defaults['slack_channel'],
     )
     parser.add_argument(
-        '-s',
-        '--slack-token',
+        'slack_token',
+        metavar='slack-token',
         help='Slack auth token',
         type=str,
-        default=defaults['slack_token'],
     )
 
     return parser

From 7687c5a14039592d8930d6e410cb05fea3624615 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Thu, 11 Jan 2024 13:40:32 -0800
Subject: [PATCH 06/23] Mention leads in Slack message

---
 scripts/gh_scripts/issue_comment_bot.py | 44 +++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index 003d5c022b7..f7ac502e6e9 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -14,7 +14,21 @@
 
 import requests
 
-staff_usernames = ['scottbarnes', 'mekarpeles', 'jimchamp', 'cdrini']
+lead_label_to_username = {
+    'Lead: @mekarpeles': 'mekarpeles',
+    'Lead: @cdrini': 'cdrini',
+    'Lead: @scottbarnes': 'scottbarnes',
+    'Lead: @seabelis': 'seabelis',
+    'Lead: @jimchamp': 'jimchamp',
+}
+
+username_to_slack_id = {
+    'mekarpeles': '<@mek>',
+    'cdrini': '<@cdrini>',
+    'scottbarnes': '<@U03MNR6T7FH>',
+    'seabelis': '<@UAHQ39ACT>',
+    'jimchamp': '<@U01ARTHG9EV>',
+}
 
 def fetch_issues(updated_since: str):
     """
@@ -100,15 +114,31 @@ def filter_issues(issues: list, since: datetime):
         if created > since:
             # Next step: Determine if the last commenter is a staff member
             last_commenter = last_comment['user']['login']
-            if last_commenter not in staff_usernames:
+            if last_commenter not in username_to_slack_id:
+                lead_label= find_lead_label(i.get('labels', []))
                 results.append({
                     'comment_url': last_comment['html_url'],
                     'commenter': last_commenter,
                     'issue_title': i['title'],
+                    'lead_label': lead_label,
                 })
 
     return results
 
+def find_lead_label(labels: list[str]) -> str:
+    """
+    Finds and returns the name of the first lead label found in the given list of GitHub labels.
+
+    Returns an empty string if no lead label is found
+    """
+    result = ''
+    for label in labels:
+        if label['name'].startswith('Lead:'):
+            result = label['name']
+            break
+
+    return result
+
 def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hours_passed: int):
     """
     Creates a threaded Slack messaged containing a digest of recently commented GitHub issues.
@@ -170,6 +200,16 @@ def comment_on_thread(message: str):
         issue_title = i['issue_title']
         commenter = i['commenter']
         message = f'<{comment_url}|Latest comment for: *{issue_title}*>\n'
+
+        username = lead_label_to_username.get(i['lead_label'], '')
+        slack_id = username_to_slack_id.get(username, '')
+        if slack_id:
+            message += f'Lead: {slack_id}\n'
+        elif i['lead_label']:
+            message += f'i["lead_label"]\n'
+        else:
+            message += 'Lead: N/A\n'
+
         message += f'Commenter: *{commenter}*'
         comment_on_thread(message)
 

From fac8e060108c61d5b54a53fa0ba6f51e0139f6bb Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Thu, 11 Jan 2024 14:06:39 -0800
Subject: [PATCH 07/23] Add more documentation

---
 scripts/gh_scripts/README.md            | 27 +++++++++++++++++++++++++
 scripts/gh_scripts/issue_comment_bot.py | 11 ++++++++--
 2 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 scripts/gh_scripts/README.md

diff --git a/scripts/gh_scripts/README.md b/scripts/gh_scripts/README.md
new file mode 100644
index 00000000000..1549c5fcf4f
--- /dev/null
+++ b/scripts/gh_scripts/README.md
@@ -0,0 +1,27 @@
+# GitHub Project Management Scripts
+
+This directory contains scripts that the Open Library team uses to interact with this GitHub repository.
+
+To quickly see a script's purpose and arguments, run the script with the `-h` or `--help` flag.
+
+## `issue_comment_bot.py`
+
+This script fetches issues that have new comments from contributors within the past number of hours, then posts a message to the team in our Slack channel.
+
+### Usage:
+This script has three positional arguments:
+```
+  hours        Fetch issues that have been updated since this many hours ago
+  channel      Issues will be published to this Slack channel
+  slack-token  Slack authentication token
+```
+
+__Running the script locally:__
+```
+docker compose exec -e PYTHONPATH=. web bash
+
+# Publish digest of new comments from the past day to #openlibrary-g:
+./scripts/gh_scripts/issue_comment_bot.py 24 "#openlibrary-g" "replace-with-slack-token"
+```
+
+__Note:__ When adding arguments, be sure to place any hyphenated values within double quotes.
diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index f7ac502e6e9..f77eeaee901 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -188,6 +188,7 @@ def comment_on_thread(message: str):
             },
         )
         if response.status_code != 200:
+            # XXX : Check "ok" field for errors
             # XXX : Log this
             print(f'Failed to POST slack message\n  Status code: {response.status_code}\n  Message: {message}')
             # XXX : Retry logic?
@@ -228,10 +229,16 @@ def start_job(args: dict):
     issues = fetch_issues(date_string)
     filtered_issues = filter_issues(issues, since)
 
+    # XXX : If we are only running this script daily, we can remove this condition to
+    # always post a message to Slack. If the digest is ever not published, we'll know 
+    # that something is wrong with our script runner.
     if filtered_issues:
         publish_digest(filtered_issues, args.channel, args.slack_token, args.hours)
-    # XXX : Log this
-    print('Digest posted to Slack')
+        # XXX : Log this
+        print('Digest posted to Slack.')
+    else:
+        # XXX : Log this
+        print('No issues needing attention found.')
 
 def _get_parser() -> argparse.ArgumentParser:
     """

From 9a535ac8ba229560ba0235ed761aabff23cacb7f Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Thu, 11 Jan 2024 14:10:51 -0800
Subject: [PATCH 08/23] Update parent thread message

---
 scripts/gh_scripts/issue_comment_bot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index f77eeaee901..4c773d6b6e8 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -147,7 +147,7 @@ def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hour
     will include a link to the comment, as well as addiitonal information.
     """
     # Create the parent message
-    parent_thread_msg = f'At least {len(issues)} new comment(s) have been left by contributors in the past {hours_passed} hour(s)'
+    parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago'
 
     response = requests.post(
         'https://slack.com/api/chat.postMessage',

From 4748e6aa2c32950e3ffe4abe92aef3777a623ff2 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Thu, 11 Jan 2024 14:26:44 -0800
Subject: [PATCH 09/23] Fix string interpolation bug

---
 scripts/gh_scripts/issue_comment_bot.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index 4c773d6b6e8..30af9b21b30 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
-Fetches Open Library GitHub issues that have been commented on 
+Fetches Open Library GitHub issues that have been commented on
 within some amount of time, in hours.
 
 Writes links to each issue to given Slack channel.
@@ -207,7 +207,7 @@ def comment_on_thread(message: str):
         if slack_id:
             message += f'Lead: {slack_id}\n'
         elif i['lead_label']:
-            message += f'i["lead_label"]\n'
+            message += f'{i["lead_label"]}\n'
         else:
             message += 'Lead: N/A\n'
 
@@ -219,7 +219,7 @@ def time_since(hours):
     now = datetime.now()
     # XXX : Add a minute or two to the delta (to avoid dropping issues)?
     since = now - timedelta(hours=hours)
-    return since, since.strftime(f'%Y-%m-%dT%H:%M:%S')
+    return since, since.strftime('%Y-%m-%dT%H:%M:%S')
 
 def start_job(args: dict):
     """
@@ -230,7 +230,7 @@ def start_job(args: dict):
     filtered_issues = filter_issues(issues, since)
 
     # XXX : If we are only running this script daily, we can remove this condition to
-    # always post a message to Slack. If the digest is ever not published, we'll know 
+    # always post a message to Slack. If the digest is ever not published, we'll know
     # that something is wrong with our script runner.
     if filtered_issues:
         publish_digest(filtered_issues, args.channel, args.slack_token, args.hours)

From 449fc9a770c4b308f44856c6c387f13e36624de2 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Thu, 11 Jan 2024 14:46:43 -0800
Subject: [PATCH 10/23] Fix linting issues

---
 scripts/gh_scripts/issue_comment_bot.py | 28 +++++++++++++++++--------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index 30af9b21b30..8cf1492ce96 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -11,6 +11,7 @@
 import time
 
 from datetime import datetime, timedelta
+from typing import Any
 
 import requests
 
@@ -30,6 +31,7 @@
     'jimchamp': '<@U01ARTHG9EV>',
 }
 
+
 def fetch_issues(updated_since: str):
     """
     Fetches all GitHub issues that have been updated since the given date string and have at least one comment.
@@ -43,12 +45,13 @@ def fetch_issues(updated_since: str):
     """
     # Make initial query for updated issues:
     query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}'
+    p: dict[str, str|int] = {
+        'q': query,
+        'per_page': 100,
+    }
     response = requests.get(
         'https://api.github.com/search/issues',
-        params={
-            'q': query,
-            'per_page': 100,
-        },
+        params=p,
     )
     d = response.json()
     results = d['items']
@@ -76,6 +79,7 @@ def get_next_page(url: str):
 
     return results
 
+
 def filter_issues(issues: list, since: datetime):
     """
     Returns list of issues that were not last responded to by staff.
@@ -115,7 +119,7 @@ def filter_issues(issues: list, since: datetime):
             # Next step: Determine if the last commenter is a staff member
             last_commenter = last_comment['user']['login']
             if last_commenter not in username_to_slack_id:
-                lead_label= find_lead_label(i.get('labels', []))
+                lead_label = find_lead_label(i.get('labels', []))
                 results.append({
                     'comment_url': last_comment['html_url'],
                     'commenter': last_commenter,
@@ -125,7 +129,8 @@ def filter_issues(issues: list, since: datetime):
 
     return results
 
-def find_lead_label(labels: list[str]) -> str:
+
+def find_lead_label(labels: list[dict[str, Any]]) -> str:
     """
     Finds and returns the name of the first lead label found in the given list of GitHub labels.
 
@@ -139,12 +144,13 @@ def find_lead_label(labels: list[str]) -> str:
 
     return result
 
-def publish_digest(issues: list[str], slack_channel: str, slack_token: str, hours_passed: int):
+
+def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token: str, hours_passed: int):
     """
     Creates a threaded Slack messaged containing a digest of recently commented GitHub issues.
 
     Parent Slack message will say how many comments were left, and the timeframe. Each reply
-    will include a link to the comment, as well as addiitonal information.
+    will include a link to the comment, as well as additional information.
     """
     # Create the parent message
     parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago'
@@ -214,6 +220,7 @@ def comment_on_thread(message: str):
         message += f'Commenter: *{commenter}*'
         comment_on_thread(message)
 
+
 def time_since(hours):
     """Returns datetime and string representations of the current time, minus the given hour"""
     now = datetime.now()
@@ -221,7 +228,8 @@ def time_since(hours):
     since = now - timedelta(hours=hours)
     return since, since.strftime('%Y-%m-%dT%H:%M:%S')
 
-def start_job(args: dict):
+
+def start_job(args: argparse.Namespace):
     """
     Starts the new comment digest job.
     """
@@ -240,6 +248,7 @@ def start_job(args: dict):
         # XXX : Log this
         print('No issues needing attention found.')
 
+
 def _get_parser() -> argparse.ArgumentParser:
     """
     Creates and returns an ArgumentParser containing default values which were
@@ -265,6 +274,7 @@ def _get_parser() -> argparse.ArgumentParser:
 
     return parser
 
+
 if __name__ == '__main__':
     # Process command-line arguments and starts the notification job
     parser = _get_parser()

From ed049524d2985fc44f2fd081956c1506ea2ae537 Mon Sep 17 00:00:00 2001
From: Chales Horn <charles.horn@gmail.com>
Date: Wed, 17 Jan 2024 14:55:33 +1300
Subject: [PATCH 11/23] ensure bwbsku local_id is uppercased. Shouldn't be so
 much of an issue now, but some old records came in lowercased. Protect
 against it ever happening again.

---
 scripts/promise_batch_imports.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/promise_batch_imports.py b/scripts/promise_batch_imports.py
index 8a373eea502..3edbc987c08 100644
--- a/scripts/promise_batch_imports.py
+++ b/scripts/promise_batch_imports.py
@@ -44,7 +44,7 @@ def map_book_to_olbook(book, promise_id):
     isbn = book.get('ISBN') or ' '
     sku = book['BookSKUB'] or book['BookSKU'] or book['BookBarcode']
     olbook = {
-        'local_id': [f"urn:bwbsku:{sku}"],
+        'local_id': [f"urn:bwbsku:{sku.upper()}"],
         'identifiers': {
             **({'amazon': [book.get('ASIN')]} if not asin_is_isbn_10 else {}),
             **({'better_world_books': [isbn]} if not is_isbn_13(isbn) else {}),

From 5a2ce149685ec2132ea6053e4d428532e957b580 Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Wed, 17 Jan 2024 09:58:12 -0800
Subject: [PATCH 12/23] Exclude Charles' comments from the digest

---
 scripts/gh_scripts/issue_comment_bot.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index 8cf1492ce96..f5618f29996 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -15,6 +15,7 @@
 
 import requests
 
+# Maps lead label to GitHub username
 lead_label_to_username = {
     'Lead: @mekarpeles': 'mekarpeles',
     'Lead: @cdrini': 'cdrini',
@@ -23,12 +24,14 @@
     'Lead: @jimchamp': 'jimchamp',
 }
 
+# Maps GitHub username to Slack ID
 username_to_slack_id = {
     'mekarpeles': '<@mek>',
     'cdrini': '<@cdrini>',
     'scottbarnes': '<@U03MNR6T7FH>',
     'seabelis': '<@UAHQ39ACT>',
     'jimchamp': '<@U01ARTHG9EV>',
+    'hornc': '<@U0EUS8DV0>',
 }
 
 

From d64a630c966921d99795682957675bd4b5b815aa Mon Sep 17 00:00:00 2001
From: James Champ <jameschamp@acm.org>
Date: Wed, 17 Jan 2024 13:30:18 -0800
Subject: [PATCH 13/23] Create workflow for comment digest job

---
 .github/workflows/new_comment_digest.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .github/workflows/new_comment_digest.yml

diff --git a/.github/workflows/new_comment_digest.yml b/.github/workflows/new_comment_digest.yml
new file mode 100644
index 00000000000..ce72caf6430
--- /dev/null
+++ b/.github/workflows/new_comment_digest.yml
@@ -0,0 +1,21 @@
+name: new_comment_digest
+on:
+  schedule:  # 08:30 daily
+    - cron: '30 8 * * *'
+  workflow_dispatch:
+permissions:
+  contents: read
+
+jobs:
+  new_comment_digest:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.x
+      - run: pip install requests
+      - run: scripts/gh_scripts/issue_comment_bot.py 24 "$SLACK_CHANNEL" "$SLACK_TOKEN"
+        env:
+          SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
+          SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL_ABC_TEAM_PLUS }}

From ec89090c79f2d0c838449081a6599741e1bc1162 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 17 Jan 2024 21:31:16 +0000
Subject: [PATCH 14/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/gh_scripts/issue_comment_bot.py | 41 ++++++++++++++-----------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/scripts/gh_scripts/issue_comment_bot.py b/scripts/gh_scripts/issue_comment_bot.py
index f5618f29996..9f12ec0f9c6 100755
--- a/scripts/gh_scripts/issue_comment_bot.py
+++ b/scripts/gh_scripts/issue_comment_bot.py
@@ -48,7 +48,7 @@ def fetch_issues(updated_since: str):
     """
     # Make initial query for updated issues:
     query = f'repo:internetarchive/openlibrary is:open is:issue comments:>0 updated:>{updated_since}'
-    p: dict[str, str|int] = {
+    p: dict[str, str | int] = {
         'q': query,
         'per_page': 100,
     }
@@ -93,12 +93,7 @@ def filter_issues(issues: list, since: datetime):
     for i in issues:
         # Fetch comments using URL from previous GitHub search results
         comments_url = i.get('comments_url')
-        resp = requests.get(
-            comments_url,
-            params={
-                'per_page': 100
-            }
-        )
+        resp = requests.get(comments_url, params={'per_page': 100})
 
         # Ensure that we have the last page of comments
         links = resp.links
@@ -123,12 +118,14 @@ def filter_issues(issues: list, since: datetime):
             last_commenter = last_comment['user']['login']
             if last_commenter not in username_to_slack_id:
                 lead_label = find_lead_label(i.get('labels', []))
-                results.append({
-                    'comment_url': last_comment['html_url'],
-                    'commenter': last_commenter,
-                    'issue_title': i['title'],
-                    'lead_label': lead_label,
-                })
+                results.append(
+                    {
+                        'comment_url': last_comment['html_url'],
+                        'commenter': last_commenter,
+                        'issue_title': i['title'],
+                        'lead_label': lead_label,
+                    }
+                )
 
     return results
 
@@ -148,7 +145,12 @@ def find_lead_label(labels: list[dict[str, Any]]) -> str:
     return result
 
 
-def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token: str, hours_passed: int):
+def publish_digest(
+    issues: list[dict[str, str]],
+    slack_channel: str,
+    slack_token: str,
+    hours_passed: int,
+):
     """
     Creates a threaded Slack messaged containing a digest of recently commented GitHub issues.
 
@@ -156,7 +158,9 @@ def publish_digest(issues: list[dict[str, str]], slack_channel: str, slack_token
     will include a link to the comment, as well as additional information.
     """
     # Create the parent message
-    parent_thread_msg = f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago'
+    parent_thread_msg = (
+        f'{len(issues)} new GitHub comment(s) since {hours_passed} hour(s) ago'
+    )
 
     response = requests.post(
         'https://slack.com/api/chat.postMessage',
@@ -199,7 +203,9 @@ def comment_on_thread(message: str):
         if response.status_code != 200:
             # XXX : Check "ok" field for errors
             # XXX : Log this
-            print(f'Failed to POST slack message\n  Status code: {response.status_code}\n  Message: {message}')
+            print(
+                f'Failed to POST slack message\n  Status code: {response.status_code}\n  Message: {message}'
+            )
             # XXX : Retry logic?
 
     for i in issues:
@@ -238,12 +244,11 @@ def start_job(args: argparse.Namespace):
     """
     since, date_string = time_since(args.hours)
     issues = fetch_issues(date_string)
-    filtered_issues = filter_issues(issues, since)
 
     # XXX : If we are only running this script daily, we can remove this condition to
     # always post a message to Slack. If the digest is ever not published, we'll know
     # that something is wrong with our script runner.
-    if filtered_issues:
+    if filtered_issues := filter_issues(issues, since):
         publish_digest(filtered_issues, args.channel, args.slack_token, args.hours)
         # XXX : Log this
         print('Digest posted to Slack.')

From b78ab04867c5cf63db59deaa56009a1f71ec18ac Mon Sep 17 00:00:00 2001
From: jimchamp <jameschamp@acm.org>
Date: Thu, 18 Jan 2024 09:30:25 -0800
Subject: [PATCH 15/23] Update Node version in `key`

---
 .github/workflows/javascript_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/javascript_tests.yml b/.github/workflows/javascript_tests.yml
index 14f22bda8d5..7891b7efc38 100644
--- a/.github/workflows/javascript_tests.yml
+++ b/.github/workflows/javascript_tests.yml
@@ -35,7 +35,7 @@ jobs:
           # `node_modules` directly.
           path: 'node_modules'
           # Note the version number in this string -- update it when updating Node!
-          key: ${{ runner.os }}-node16-${{ hashFiles('**/package-lock.json') }}
+          key: ${{ runner.os }}-node20-${{ hashFiles('**/package-lock.json') }}
       - if: steps.npm-cache.outputs.cache-hit != 'true'
         run: npm install --no-audit
       - run: npm run lint

From b18239e358614f292a70ac482384861d8a9a3f8f Mon Sep 17 00:00:00 2001
From: jimchamp <jameschamp@acm.org>
Date: Thu, 18 Jan 2024 09:36:53 -0800
Subject: [PATCH 16/23] Add comment as reminder

---
 .github/workflows/javascript_tests.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/javascript_tests.yml b/.github/workflows/javascript_tests.yml
index 7891b7efc38..12440d142d4 100644
--- a/.github/workflows/javascript_tests.yml
+++ b/.github/workflows/javascript_tests.yml
@@ -24,7 +24,8 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-node@v3
         with:
-          node-version: '20' # Should match what's in our Dockerfile
+          # Should match what's in our Dockerfile
+          node-version: '20' # Also update the `key` in the `with` map, below
       - uses: actions/cache@v3
         id: npm-cache
         with:

From 49ee282f2d1ba91638cdc9b213e644da42f82421 Mon Sep 17 00:00:00 2001
From: Mek <michael.karpeles@gmail.com>
Date: Fri, 19 Jan 2024 11:25:07 -0800
Subject: [PATCH 17/23] Add client_id, s3, scope to bulk availability API
 (#7522)

Co-authored-by: Christian Clauss <cclauss@me.com>
---
 openlibrary/core/lending.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/openlibrary/core/lending.py b/openlibrary/core/lending.py
index d7e2a1949cb..f59df48a475 100644
--- a/openlibrary/core/lending.py
+++ b/openlibrary/core/lending.py
@@ -308,7 +308,22 @@ def update_availability_schema_to_v2(v1_resp, ocaid):
 
     url = '{}?{}={}'.format(config_ia_availability_api_v2_url, key, ','.join(ids))
     try:
-        response = requests.get(url, timeout=config_http_request_timeout)
+        client_ip = web.ctx.env.get('HTTP_X_FORWARDED_FOR', 'ol-internal')
+        params = {"scope": "printdisabled"}
+        headers = {
+            "x-preferred-client-id": client_ip,
+            "x-application-id": "openlibrary",
+        }
+        if config_ia_ol_metadata_write_s3:
+            headers["authorization"] = "LOW {s3_key}:{s3_secret}".format(
+                **config_ia_ol_metadata_write_s3
+            )
+
+        # Make authenticated request to Bulk Availability API
+        response = requests.get(
+            url, params=params, headers=headers, timeout=config_http_request_timeout
+        )
+
         items = response.json().get('responses', {})
         for pkey in items:
             ocaid = pkey if key == 'identifier' else items[pkey].get('identifier')

From df3eab85133974def0d40807ea61384b6b59a437 Mon Sep 17 00:00:00 2001
From: "Mark F. Heiman" <mark.heiman@gmail.com>
Date: Fri, 19 Jan 2024 21:19:44 -0600
Subject: [PATCH 18/23] Add Unicode folding to text_title_sort SOLR type

---
 conf/solr/conf/managed-schema.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/solr/conf/managed-schema.xml b/conf/solr/conf/managed-schema.xml
index c74ebff8f0e..b513a8f9c51 100644
--- a/conf/solr/conf/managed-schema.xml
+++ b/conf/solr/conf/managed-schema.xml
@@ -514,8 +514,8 @@
     <fieldType name="text_title_sort" class="solr.TextField" sortMissingLast="false" omitNorms="true">
       <analyzer>
         <tokenizer name="keyword" />
-        <filter name="lowercase" />
         <filter name="trim" />
+        <filter name="icuFolding"/>
         <filter name="patternReplace"
                 pattern="^(an? |the |l[aeo]s? |l'|de la |el |il |un[ae]? |du |de[imrst]? |das |ein |eine[mnrs]? |bir )(.*)"
                 replacement="$2, $1"

From ab36c9e9cdf306090ce97e2b0fe06c4f7db2a17f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 22 Jan 2024 13:04:06 -0800
Subject: [PATCH 19/23] [pre-commit.ci] pre-commit autoupdate (#8745)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.1.13 → v0.1.14](https://github.com/astral-sh/ruff-pre-commit/compare/v0.1.13...v0.1.14)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ab3ba6b3b73..ddfcd8a69dc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,7 +32,7 @@ repos:
       - id: auto-walrus
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.13
+    rev: v0.1.14
     hooks:
       - id: ruff
 

From bfc63f1aa819f3cd13426c9257d1a59526e8da8a Mon Sep 17 00:00:00 2001
From: Drini Cami <cdrini@gmail.com>
Date: Mon, 22 Jan 2024 16:06:38 -0500
Subject: [PATCH 20/23] Fix share modal iframe url (#8738)

---
 openlibrary/macros/ShareModal.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openlibrary/macros/ShareModal.html b/openlibrary/macros/ShareModal.html
index ede2b16d511..8ec6e488ec6 100644
--- a/openlibrary/macros/ShareModal.html
+++ b/openlibrary/macros/ShareModal.html
@@ -6,7 +6,7 @@
 
 
 $def embed_iframe(page_url):
-  <iframe frameborder="0" width="165" height="400" src="$(page_url)/-/widget"></iframe>
+  <iframe frameborder="0" width="165" height="400" src="//$request.domain$page_url/widget"></iframe>
 
 <div class="$classes">
   $:link_markup

From cb380d7fa2fd1de80083b4f9451f4d0f2456ac79 Mon Sep 17 00:00:00 2001
From: Rebecca Shoptaw <rebecca.shoptaw@gmail.com>
Date: Tue, 23 Jan 2024 09:00:37 -0500
Subject: [PATCH 21/23] Switch Portuguese translations for 3 phrases.

---
 openlibrary/i18n/pt/messages.po | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openlibrary/i18n/pt/messages.po b/openlibrary/i18n/pt/messages.po
index fafca1c8542..bfd77928985 100644
--- a/openlibrary/i18n/pt/messages.po
+++ b/openlibrary/i18n/pt/messages.po
@@ -638,7 +638,7 @@ msgstr "Pedir emprestado \"%(title)s\""
 
 #: ReadButton.html:15 type/list/embed.html:79 widget.html:39
 msgid "Borrow"
-msgstr "Emprestar"
+msgstr "Empréstimo"
 
 #: widget.html:45
 #, python-format
@@ -3172,7 +3172,7 @@ msgstr "Explorar o Centro de Desenvolvimento da Open Library"
 
 #: lib/nav_foot.html:37 lib/nav_head.html:25
 msgid "Developer Center"
-msgstr "Centro de Desenvolvedor"
+msgstr "Central de desenvolvedor"
 
 #: lib/nav_foot.html:38
 msgid "Explore Open Library APIs"
@@ -3204,7 +3204,7 @@ msgstr "Adicionar um novo livro à Open Library"
 
 #: lib/nav_foot.html:47
 msgid "Help Center"
-msgstr "Centro de ajuda"
+msgstr "Central de ajuda"
 
 #: lib/nav_foot.html:48
 msgid "Problems"

From e9df97983f8a4e0453a5667e4bf2e46e337b2705 Mon Sep 17 00:00:00 2001
From: Drini Cami <cdrini@gmail.com>
Date: Tue, 9 Jan 2024 14:15:22 -0500
Subject: [PATCH 22/23] Add sentry transactions to covers server

---
 conf/coverstore.yml                       |  1 +
 openlibrary/plugins/openlibrary/sentry.py |  4 +-
 openlibrary/utils/sentry.py               | 66 ++++++++++++++---------
 3 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/conf/coverstore.yml b/conf/coverstore.yml
index 2baaebdec59..44c538f481c 100644
--- a/conf/coverstore.yml
+++ b/conf/coverstore.yml
@@ -12,4 +12,5 @@ sentry:
     enabled: false
     # Dummy endpoint; where sentry logs are sent to
     dsn: 'https://examplePublicKey@o0.ingest.sentry.io/0'
+    traces_sample_rate: 1.0
     environment: 'local'
diff --git a/openlibrary/plugins/openlibrary/sentry.py b/openlibrary/plugins/openlibrary/sentry.py
index e21ef55c4bd..ef729885c6e 100644
--- a/openlibrary/plugins/openlibrary/sentry.py
+++ b/openlibrary/plugins/openlibrary/sentry.py
@@ -1,6 +1,6 @@
 import infogami
 from infogami.utils import delegate
-from openlibrary.utils.sentry import Sentry, SentryProcessor
+from openlibrary.utils.sentry import Sentry, InfogamiSentryProcessor
 
 sentry: Sentry | None = None
 
@@ -12,4 +12,4 @@ def setup():
     if sentry.enabled:
         sentry.init()
         delegate.add_exception_hook(lambda: sentry.capture_exception_webpy())
-        delegate.app.add_processor(SentryProcessor())
+        delegate.app.add_processor(InfogamiSentryProcessor(delegate.app))
diff --git a/openlibrary/utils/sentry.py b/openlibrary/utils/sentry.py
index 8de2c91a923..cd17f266abb 100644
--- a/openlibrary/utils/sentry.py
+++ b/openlibrary/utils/sentry.py
@@ -76,6 +76,7 @@ def capture_exception():
             return _internalerror()
 
         app.internalerror = capture_exception
+        app.add_processor(WebPySentryProcessor(app))
 
     def capture_exception_webpy(self):
         with sentry_sdk.push_scope() as scope:
@@ -97,12 +98,48 @@ def to_sentry_name(self) -> str:
         )
 
 
-class SentryProcessor:
+class WebPySentryProcessor:
+    def __init__(self, app: web.application):
+        self.app = app
+
+    def find_route_name(self) -> str:
+        handler, groups = self.app._match(self.app.mapping, web.ctx.path)
+        web.debug('ROUTE HANDLER', handler, groups)
+        return handler or '<other>'
+
+    def __call__(self, handler):
+        route_name = self.find_route_name()
+        hub = sentry_sdk.Hub.current
+        with sentry_sdk.Hub(hub) as hub:
+            with hub.configure_scope() as scope:
+                scope.clear_breadcrumbs()
+                scope.add_event_processor(add_web_ctx_to_event)
+
+            environ = dict(web.ctx.env)
+            # Don't forward cookies to Sentry
+            if 'HTTP_COOKIE' in environ:
+                del environ['HTTP_COOKIE']
+
+            transaction = Transaction.continue_from_environ(
+                environ,
+                op="http.server",
+                name=route_name,
+                source=TRANSACTION_SOURCE_ROUTE,
+            )
+
+            with hub.start_transaction(transaction):
+                try:
+                    return handler()
+                finally:
+                    transaction.set_http_status(int(web.ctx.status.split()[0]))
+
+
+class InfogamiSentryProcessor(WebPySentryProcessor):
     """
     Processor to profile the webpage and send a transaction to Sentry.
     """
 
-    def __call__(self, handler):
+    def find_route_name(self) -> str:
         def find_type() -> tuple[str, str] | None:
             return next(
                 (
@@ -134,27 +171,4 @@ def find_route() -> InfogamiRoute:
 
             return result
 
-        route = find_route()
-        hub = sentry_sdk.Hub.current
-        with sentry_sdk.Hub(hub) as hub:
-            with hub.configure_scope() as scope:
-                scope.clear_breadcrumbs()
-                scope.add_event_processor(add_web_ctx_to_event)
-
-            environ = dict(web.ctx.env)
-            # Don't forward cookies to Sentry
-            if 'HTTP_COOKIE' in environ:
-                del environ['HTTP_COOKIE']
-
-            transaction = Transaction.continue_from_environ(
-                environ,
-                op="http.server",
-                name=route.to_sentry_name(),
-                source=TRANSACTION_SOURCE_ROUTE,
-            )
-
-            with hub.start_transaction(transaction):
-                try:
-                    return handler()
-                finally:
-                    transaction.set_http_status(int(web.ctx.status.split()[0]))
+        return find_route().to_sentry_name()

From a014cedcbfe8b06e5be076983ae5de9ff1ce9279 Mon Sep 17 00:00:00 2001
From: Drini Cami <cdrini@gmail.com>
Date: Tue, 9 Jan 2024 14:30:28 -0500
Subject: [PATCH 23/23] Switch covers.GET to use return instead of raise

This lets the requests correctly be reflected in Sentry as non-erroring
---
 openlibrary/coverstore/code.py | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/openlibrary/coverstore/code.py b/openlibrary/coverstore/code.py
index 0d34b7fb976..9900f2bc8eb 100644
--- a/openlibrary/coverstore/code.py
+++ b/openlibrary/coverstore/code.py
@@ -239,17 +239,9 @@ def notfound():
             ):
                 return read_file(config.default_image)
             elif is_valid_url(i.default):
-                raise web.seeother(i.default)
+                return web.seeother(i.default)
             else:
-                raise web.notfound("")
-
-        def redirect(id):
-            size_part = size and ("-" + size) or ""
-            url = f"/{category}/id/{id}{size_part}.jpg"
-
-            if query := web.ctx.env.get('QUERY_STRING'):
-                url += '?' + query
-            raise web.found(url)
+                return web.notfound("")
 
         if key == 'isbn':
             value = value.replace("-", "").strip()  # strip hyphens from ISBN
@@ -257,7 +249,7 @@ def redirect(id):
         elif key == 'ia':
             url = self.get_ia_cover_url(value, size)
             if url:
-                raise web.found(url)
+                return web.found(url)
             else:
                 value = None  # notfound or redirect to default. handled later.
         elif key != 'id':
@@ -269,7 +261,7 @@ def redirect(id):
         # redirect to archive.org cluster for large size and original images whenever possible
         if size in ("L", "") and self.is_cover_in_cluster(value):
             url = zipview_url_from_id(int(value), size)
-            raise web.found(url)
+            return web.found(url)
 
         # covers_0008 batches [_00, _82] are tar'd / zip'd in archive.org items
         if isinstance(value, int) or value.isnumeric():  # noqa: SIM102
@@ -281,7 +273,7 @@ def redirect(id):
                 item_file = f"{pid}{'-' + size.upper() if size else ''}"
                 path = f"{item_id}/{item_tar}/{item_file}.jpg"
                 protocol = web.ctx.protocol
-                raise web.found(f"{protocol}://archive.org/download/{path}")
+                return web.found(f"{protocol}://archive.org/download/{path}")
 
         d = self.get_details(value, size.lower())
         if not d:
@@ -291,7 +283,7 @@ def redirect(id):
         if key == 'id':
             etag = f"{d.id}-{size.lower()}"
             if not web.modified(trim_microsecond(d.created), etag=etag):
-                raise web.notmodified()
+                return web.notmodified()
 
             web.header('Cache-Control', 'public')
             # this image is not going to expire in next 100 years.
@@ -306,14 +298,14 @@ def redirect(id):
             from openlibrary.coverstore import archive
 
             if d.id >= 8_820_000 and d.uploaded and '.zip' in d.filename:
-                raise web.found(
+                return web.found(
                     archive.Cover.get_cover_url(
                         d.id, size=size, protocol=web.ctx.protocol
                     )
                 )
             return read_image(d, size)
         except OSError:
-            raise web.notfound()
+            return web.notfound()
 
     def get_ia_cover_url(self, identifier, size="M"):
         url = "https://archive.org/metadata/%s/metadata" % identifier