Skip to content

Commit

Permalink
Do garbage collect in batches [RHELDST-20725]
Browse files Browse the repository at this point in the history
Scheduled garbage collection jobs were failing because of out of memory
exceptions. This change makes garbage collect run in batches to limit
the amount of resources used at once.
  • Loading branch information
amcmahon-rh committed Oct 20, 2023
1 parent 67d7b54 commit 9dcd945
Showing 1 changed file with 24 additions and 5 deletions.
29 changes: 24 additions & 5 deletions pubtools/_pulp/tasks/garbage_collect.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import logging
from datetime import datetime, timedelta

Expand All @@ -10,6 +11,8 @@
LOG = logging.getLogger("pubtools.pulp")
step = PulpTask.step

UNASSOCIATE_BATCH_LIMIT = int(
os.getenv("PULP_GC_UNASSOCIATE_BATCH_LIMIT", "10000"))

class GarbageCollect(PulpClientService, PulpTask):
"""Perform garbage collection on Pulp data.
Expand Down Expand Up @@ -96,16 +99,32 @@ def clean_all_rpm_content(self):
LOG.info("No repos found for cleaning.")
return
arc_repo = clean_repos[0]
deleted_arc = list(arc_repo.remove_content(criteria=criteria))

deleted_content = []
for task in deleted_arc:
if task.repo_id == "all-rpm-content":

while True:
delete_arc_tasks = list(
arc_repo.remove_content(criteria=criteria,
limit=UNASSOCIATE_BATCH_LIMIT)
)
# The pulp client returns task(s) wrapped in a list
arc_tasks = list(filter(lambda x: x.repo_id == "all-rpm-content", delete_arc_tasks))
for task in arc_tasks:
for unit in task.units:
LOG.info("Old all-rpm-content deleted: %s", unit.name)
deleted_content.append(unit)
if not deleted_content:
LOG.info("No all-rpm-content found older than %s", arc_threshold)

# Assuming the client can return multiple (python) tasks.
# If all tasks delete less units than the batch limit,
# implying all relevant units have been unassociated.
if not arc_tasks or \
not any([t for t in arc_tasks
if len(t.units) < UNASSOCIATE_BATCH_LIMIT]):
break

if not deleted_content:
LOG.info("No all-rpm-content found older than %s",
arc_threshold)

def entry_point():
with GarbageCollect() as instance:
Expand Down

0 comments on commit 9dcd945

Please sign in to comment.