Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(reviewBot): implement review bot in python to verify reviews #3235

Merged
merged 13 commits into from
May 29, 2024
46 changes: 46 additions & 0 deletions .github/workflows/docs-review-bot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: 📖 Review Documentation Bot

on:
workflow_dispatch:
inputs:
dryRun:
description: 'Run the bot without sending notifications'
required: false
default: 'false'

schedule:
- cron: '0 8 * * 1'

env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
DRY_RUN: false

jobs:
review-docs:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt

- name: Run Documentation Review Bot in dry run mode
if: github.event_name == 'workflow_dispatch' && github.event.inputs.dryRun == 'true'
env:
DRY_RUN: true
run: |
python bin/check-review-dates.py

- name: Run Documentation Review Bot in normal mode
if: github.event_name != 'workflow_dispatch' || github.event.inputs.dryRun != 'true'
run: |
python bin/check-review-dates.py
126 changes: 126 additions & 0 deletions bin/check-review-dates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os
import logging
from slack_sdk import WebClient
from datetime import timedelta, date, datetime

DEFAULT_VAL_FREQ = 6

def convert_to_date_and_delta(val_date, val_freq):
"Converts validation date string to datetime and validation frequency string (months) to timedelta."
try:
val_date_conv = datetime.strptime(val_date.rstrip('\n'), '%Y-%m-%d').date()
val_freq_conv = timedelta(days=int(val_freq) * 30.4)
return val_date_conv, val_freq_conv
except ValueError:
# handles the case where validation format is incorrect
return None, None

def needs_review(val_date, val_freq):
"Returns true if doc needs to be reviewed, based on val date and frequency"
val_date_conv, val_freq_conv = convert_to_date_and_delta(val_date, val_freq)
if val_date_conv is None or val_freq_conv is None:
return False
today = date.today()
# calculate how long since doc was reviewed, in days
delta = today - val_date_conv
# return true or false depending on evaluation of data
return delta >= val_freq_conv

def extract_metadata(filepath):
"Extracts validation date and validation frequency from a document."
with open(filepath) as doc:
meta_limiters = 0
has_val_date = False
val_freq = DEFAULT_VAL_FREQ

for line in doc:
if "validation: " in line:
val_date = line.split(": ", 1)[1].strip()
has_val_date = True
if "validation-frequency:" in line:
val_freq = line.split(": ", 1)[1].strip()
if "---" in line:
meta_limiters += 1
# once two --- strings are found, it is the end of the meta section, stop checking file
if meta_limiters >= 2:
break

return has_val_date, val_date if has_val_date else None, val_freq

def process_files(directory):
"Processes files in the content directory to check for those needing review."
print("Processing files to check for those needing review")
docs_to_review=[]
for subdir, dirs, files in os.walk(directory):
for file in files:
filepath = os.path.join(subdir, file)
if filepath.endswith(".mdx"):
has_val_date, val_date, val_freq = extract_metadata(filepath)
if has_val_date and needs_review(val_date, val_freq):
docs_to_review.append(filepath)
return docs_to_review

def get_doc_cat_name(filepath):
"Returns a document-to-review's category and tidied-up filepath, based on its raw filepath."
trimmed_filepath = filepath[2:-4]
filepath_list = trimmed_filepath.split("/")

if filepath_list[0] == "tutorials":
category = filepath_list[0]
elif filepath_list[0] == "faq":
category = filepath_list[1]
else:
category = ' '.join(filepath_list[0:2])

return category, trimmed_filepath

def organize_docs_by_category(docs_to_review):
"Organizes docs to review by category into a dictionary."
print("Organizing docs by category")
dict_by_cat = {}

for filepath in docs_to_review:
category, trimmed_filepath = get_doc_cat_name(filepath)

if category not in dict_by_cat:
dict_by_cat[category] = [trimmed_filepath]
else:
dict_by_cat[category].append(trimmed_filepath)

# sort the dictionary alphabetically by category
dict_by_cat_sorted = {key: value for key, value in sorted(dict_by_cat.items())}

return dict_by_cat_sorted

def prep_message(docs_to_review_by_cat):
"Prepares the message to sent to the Slack channel, containing the docs to review"
print("Preparing message")
message = ":wave: Hi doc team, here are some docs to review: \n \n"

for key in docs_to_review_by_cat:
message += "*" + key.title() + "*" + "\n"
for doc in docs_to_review_by_cat[key]:
message += doc + "\n"
message += "\n"
print(message)
return(message)

def send_message(message):
"Sends the message containing docs to review to the Slack channel"
print("Sending message")
client = WebClient(token=os.environ['SLACK_BOT_TOKEN'])
client.chat_postMessage(
channel = "#review-doc",
text = message,
username = "DocReviewBot"
)

def main():
docs_to_review = process_files(".")
docs_to_review_by_cat = organize_docs_by_category(docs_to_review)
message = prep_message(docs_to_review_by_cat)
if os.environ.get("DRY_RUN") != "true":
send_message(message)

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
slack_sdk == 3.27.2
Loading