Skip to content

Commit

Permalink
feat: add status command, rate limit handling and --dry-run (#21)
Browse files Browse the repository at this point in the history
Fix #10.
  • Loading branch information
bfabio authored Jul 23, 2024
1 parent cf4468e commit dc3e45a
Showing 1 changed file with 181 additions and 69 deletions.
250 changes: 181 additions & 69 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,25 @@
"""Open issues in repos for errors in publiccode.yml."""

import argparse
import os
import hashlib
import re
import requests
import logging

import jinja2
import github

import os
import re
import sys
import time
from collections import namedtuple
from datetime import datetime, timezone, timedelta
from datetime import datetime, timedelta, timezone
from urllib.parse import urlparse

API_BASEURL = os.getenv('API_BASEURL', 'https://api.developers.italia.it/v1')
GITHUB_USERNAME = os.getenv('GITHUB_USERNAME', 'publiccode-validator-bot')
import github
import jinja2
import requests

API_BASEURL = os.getenv("API_BASEURL", "https://api.developers.italia.it/v1")
GITHUB_USERNAME = os.getenv("GITHUB_USERNAME", "publiccode-validator-bot")

SoftwareLog = namedtuple("SoftwareLog", "log_url software formatted_error_output")

# logging.basicConfig(level=logging.DEBUG)

def to_markdown(error: str, repo_url: str) -> str:
out = "| |Message|\n|-|-|\n"
Expand All @@ -30,21 +30,48 @@ def to_markdown(error: str, repo_url: str) -> str:
}

for line in error.splitlines():
m = re.match(r"^publiccode.yml:(?P<line_num>\d+):\d+: (?P<level>.*?): (?P<msg>.*)", line)
m = re.match(
r"^publiccode.yml:(?P<line_num>\d+):\d+: (?P<level>.*?): (?P<msg>.*)", line
)
if m is None:
out += f"|❌|`{line}`|\n"
continue

line_num = m.group('line_num')
level = m.group('level')
msg = m.group('msg')
line_num = m.group("line_num")
level = m.group("level")
msg = m.group("msg")

repo_url = repo_url.removesuffix('.git')
repo_url = repo_url.removesuffix(".git")

out += f"|{icons.get(level, '')} [`publiccode.yml:{line_num}`]({repo_url}/blob/HEAD/publiccode.yml#L{line_num})| `{msg}`|\n"

return out


def software():
software = []

page = True
page_after = ""

while page:
res = requests.get(
f"{API_BASEURL}/software{page_after}",
params={
"page[size]": 100,
},
)
res.raise_for_status()

body = res.json()
software += body["data"]

page_after = body["links"]["next"]
page = bool(page_after)

return software


def software_logs(days):
logs = []

Expand All @@ -55,56 +82,75 @@ def software_logs(days):

while page:
res = requests.get(
f'{API_BASEURL}/logs{page_after}',
f"{API_BASEURL}/logs{page_after}",
params={
'from': begin.strftime('%Y-%m-%dT%H:%M:%SZ'),
'page[size]': 100,
}
"from": begin.strftime("%Y-%m-%dT%H:%M:%SZ"),
"page[size]": 100,
},
)
res.raise_for_status()

body = res.json()
logs += body['data']
logs += body["data"]

page_after = body['links']['next']
page_after = body["links"]["next"]
page = bool(page_after)

# XXX: get last for repo

for log in logs:
match = re.search(r"^\[(?P<repo>.+?)\] BAD publiccode.yml: (?P<parser_output>.*)", log['message'], re.MULTILINE|re.DOTALL)
match = re.search(
r"^\[(?P<repo>.+?)\] BAD publiccode.yml: (?P<parser_output>.*)",
log["message"],
re.MULTILINE | re.DOTALL,
)
if match:
entity = log.get('entity')
entity = log.get("entity")

if not entity or entity == "//":
continue

res = requests.get(f'{API_BASEURL}{entity}')
res = requests.get(f"{API_BASEURL}{entity}")
res.raise_for_status()

software = res.json()

yield SoftwareLog(f'{API_BASEURL}/logs/{log["id"]}', software, to_markdown(match.group('parser_output'), software['url']))
yield SoftwareLog(
f'{API_BASEURL}/logs/{log["id"]}',
software,
to_markdown(match.group("parser_output"), software["url"]),
)


def issues(gh: github.Github, repo: str) -> list[github.Issue.Issue]:
issues = gh.search_issues(
"publiccode.yml in:title state:open is:issue",
"updated",
"desc",
repo=repo,
author=GITHUB_USERNAME
author=GITHUB_USERNAME,
)

return [issue for issue in issues]


def issues_by_state(
issues: list[github.Issue.Issue], state: str
) -> list[github.Issue.Issue]:
return list(filter(lambda issue: issue.state == state, issues))


def has_issue(issues: list[github.Issue.Issue]) -> github.Issue.Issue | None:
return next(iter(issues), False)


def has_open_issue(issues: list[github.Issue.Issue]) -> github.Issue.Issue | None:
return next(filter(lambda issue: issue.state == 'open', issues), None)
return next(filter(lambda issue: issue.state == "open", issues), None)


def has_closed_issue(issues: list[github.Issue.Issue]) -> github.Issue.Issue | None:
return next(filter(lambda issue: issue.state == 'closed', issues), None)
return next(filter(lambda issue: issue.state == "closed", issues), None)


def should_update_issue(sha1sum: str, issue: github.Issue.Issue) -> bool:
compact = list(filter(None, issue.body.splitlines()))
Expand All @@ -115,62 +161,128 @@ def should_update_issue(sha1sum: str, issue: github.Issue.Issue) -> bool:
print(f"WARNING: can't find hidden_fields in issue {issue.html_url}")
return False

data = re.split("[,=]", m.group('data'))
data = re.split("[,=]", m.group("data"))
hidden_fields = dict(zip(data[::2], data[1::2]))

return issue.state == 'open' or hidden_fields['sha1sum'] != sha1sum
return issue.state == "open" or hidden_fields["sha1sum"] != sha1sum

def main():
parser = argparse.ArgumentParser(
description="Open issues in repos for errors in publiccode.yml from the logs in Developers Italia API.",
)
parser.add_argument(
'--since', action="store", dest="since", type=int,
default=1, help="Number of days to go back for the analyzed logs (default: 1)"
)
args = parser.parse_args()

gh = github.Github(os.getenv("BOT_GITHUB_TOKEN"))
# github.enable_console_debug_logging()
def status(gh):
issues = gh.search_issues(f"author:{GITHUB_USERNAME} type:issue sort:updated")

for issue in issues:
print(f"{issue.state}\t{issue.updated_at.isoformat()}\t{issue.html_url}")


def run(gh, since, dry_run):
environment = jinja2.Environment(loader=jinja2.FileSystemLoader("templates/"))
template = environment.get_template("github/issue.it.tpl.txt")

for log in software_logs(args.since):
if not log.software['url'].startswith("https://github.com/"):
print(f"{log.software['url']} is not a GitHub repo. Only GitHub is supported for now.")
issues_created = 0
issues_updated = 0
issues_untouched = 0

for log in software_logs(since):
url = log.software["url"].lower()
if not url.startswith("https://github.com/"):
print(f"🚫 {url} is not a GitHub repo. Only GitHub is supported for now.")
continue

if log.software['url'] != 'https://github.com/teamdigitale/dati-semantic-lode.git' and \
log.software['url'] != 'https://github.com/KnowageLabs/Knowage-Server.git':
print(f"Skipping {log.software['url']}")
if "codiceIPA is " in log.formatted_error_output:
print(f"skipping {url} (codiceIPA is...)")
continue

debug = urlparse(log.software['url']).path
sha1sum = hashlib.sha1(log.formatted_error_output.encode('utf-8')).hexdigest()
debug = urlparse(url).path
sha1sum = hashlib.sha1(log.formatted_error_output.encode("utf-8")).hexdigest()

content = template.render(
{"formatted_error_output": log.formatted_error_output,
"debug": debug,
"api_log_url": log.log_url,
"hidden_fields": {
"sha1sum": sha1sum
}},
{
"formatted_error_output": log.formatted_error_output,
"debug": debug,
"api_log_url": log.log_url,
"hidden_fields": {"sha1sum": sha1sum},
},
)

repo_path = urlparse(log.software['url']).path[1:].removesuffix('.git')
iss = issues(gh, repo_path)

issue = has_issue(iss)
if not issue:
repo = gh.get_repo(f"{repo_path}", lazy=True)
print(f"Creating issue for {log.software['url']}...")
repo.create_issue(title="Errors in publiccode.yml file", body=content)
elif should_update_issue(sha1sum, issue):
print(f"Updating issue for {log.software['url']}...")
issue.edit(body=content)
else:
print("Doing nothing")
repo_path = urlparse(url).path[1:].removesuffix(".git")

try:
iss = issues(gh, repo_path)

issue = has_issue(iss)
if not issue:
repo = gh.get_repo(f"{repo_path}", lazy=True)
print(f"➕ Creating issue for {url}...")
if not dry_run:
repo.create_issue(
title="Errors in publiccode.yml file", body=content
)

issues_created += 1
elif should_update_issue(sha1sum, issue):
print(f"🔄 Updating issue for {url}...")
if not dry_run:
issue.edit(body=content)

issues_updated += 1
else:
print(f"== Issue is open and unchanged, doing nothing ({url})")
issues_untouched += 1
except github.GithubException as e:
if isinstance(e, github.RateLimitExceededException):
reset_timestamp = int(e.headers.get("x-ratelimit-reset", 0))
sleep_duration = max(0, reset_timestamp - int(time.time()) + 10)

print(f"Rate limit exceeded. Sleeping for {sleep_duration} seconds...", end="")
sys.stdout.flush()
time.sleep(sleep_duration)
print("done")
else:
print(f"Error in the GitHub request, repo=${url}: {e}")
continue

print("==== Summary ====")
print(f"➕ Created issues:\t{issues_created}")
print(f"🔄 Updated issues:\t{issues_updated}")
print(f"== Untouched issues:\t{issues_untouched}")


def main():
parser = argparse.ArgumentParser(
description="Open issues in repos for errors in publiccode.yml from the logs in Developers Italia API.",
)
parser.add_argument(
"--since",
action="store",
dest="since",
type=int,
default=1,
help="Number of days to go back for the analyzed logs (default: 1)",
)
parser.add_argument(
"-n",
"--dry-run",
action="store_true",
dest="dry_run",
default=False,
help="Don't actually create or update issues, just print",
)

subparsers = parser.add_subparsers(dest="command")
subparsers.add_parser("status")

args = parser.parse_args()

gh = github.Github(os.getenv("BOT_GITHUB_TOKEN"))

# logging.basicConfig(level=logging.DEBUG)
# github.enable_console_debug_logging()

if args.command == "status":
status(gh)
else:
run(gh, args.since, args.dry_run)


if __name__ == "__main__":
main()

0 comments on commit dc3e45a

Please sign in to comment.