Skip to content

Commit

Permalink
Add manage_crawls management command
Browse files Browse the repository at this point in the history
  • Loading branch information
chosak committed Sep 16, 2024
1 parent dd92cdc commit bf307ae
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions crawler/management/commands/manage_crawls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from django.db.models import OuterRef, Subquery

import djclick as click

from crawler.models import Crawl


@click.group()
def cli():
pass


@cli.command()
def list():
for crawl in Crawl.objects.all():
click.secho(crawl)


@cli.command()
@click.argument("crawl_id", type=int)
@click.option("--dry-run", is_flag=True)
def delete(crawl_id, dry_run):
crawl = Crawl.objects.get(pk=crawl_id)
click.secho(f"Deleting {crawl}")

if not dry_run:
crawl.delete()
else:
click.secho("Dry run, skipping deletion")


@cli.command()
@click.option(
"--keep", type=int, help="Keep this many crawls of each status", default=1
)
@click.option("--dry-run", is_flag=True)
def clean(keep, dry_run):
crawls_to_keep = (
Crawl.objects.filter(status=OuterRef("status"))
.order_by("-started")
.values("pk")[:keep]
)

crawls_to_delete = Crawl.objects.exclude(pk__in=Subquery(crawls_to_keep))

click.secho(f"Deleting {crawls_to_delete.count()} crawls")
for crawl in crawls_to_delete:
click.secho(crawl)

if not dry_run:
crawls_to_delete.delete()
else:
click.secho("Dry run, skipping deletion")

0 comments on commit bf307ae

Please sign in to comment.