Skip to content

Commit

Permalink
Update upload_repo for arc split [RHELDST-23264]
Browse files Browse the repository at this point in the history
Currently, all rpm and errata content is pushed to all-rpm-content repo in each environment. Accumulation in the repos is generating concern for performance. To alleviate the build-up, we'll be splitting content between several repos; 'all-rpm-content-XX' for rpms and 'all-erratum-content-YYYY' for errata.

This change refactors the upload_repo and upload_context to reflect this
split. It does not include validation for the errata year range, as that
feels like a specific implimentation detail.
  • Loading branch information
amcmahon-rh committed Apr 8, 2024
1 parent dc7caa9 commit 6ded1b3
Show file tree
Hide file tree
Showing 18 changed files with 208 additions and 63 deletions.
5 changes: 5 additions & 0 deletions pubtools/_pulp/services/fakepulp.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ def load_initial(self):
self.ctrl.insert_repository(FileRepository(id="redhat-maintenance"))
self.ctrl.insert_repository(FileRepository(id="all-iso-content"))
self.ctrl.insert_repository(YumRepository(id="all-rpm-content"))
# Repos required for common test data (RHELDST-23264)
self.ctrl.insert_repository(YumRepository(id="all-rpm-content-e8"))
self.ctrl.insert_repository(YumRepository(id="all-rpm-content-54"))
self.ctrl.insert_repository(YumRepository(id="all-erratum-content-2019"))
self.ctrl.insert_repository(YumRepository(id="all-erratum-content-2020"))

def load(self):
"""Load data into the fake from previously serialized state (if any).
Expand Down
3 changes: 2 additions & 1 deletion pubtools/_pulp/tasks/push/items/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from .base import PulpPushItem, State
from .rpm import PulpRpmPushItem
from .file import PulpFilePushItem
from .erratum import PulpErratumPushItem
from .erratum import PulpErratumPushItem, ErratumPushItemException
from .modulemd import PulpModuleMdPushItem
from .comps import PulpCompsXmlPushItem
from .productid import PulpProductIdPushItem

__all__ = [
"ErratumPushItemException",
"PulpPushItem",
"PulpRpmPushItem",
"PulpFilePushItem",
Expand Down
25 changes: 21 additions & 4 deletions pubtools/_pulp/tasks/push/items/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,24 @@ class PulpPushItem(object):
to a single unit (e.g. modulemd YAML files; comps.xml files).
"""

MULTI_UPLOAD_CONTEXT = False
"""
Can the push item class create a different upload context based on the data
it holds. e.g PulpRpmPushItem has checksum separated upload repos/contexts.
If true, subclass should override upload_repo.
"""

@property
def upload_repo(self):
"""Given a PulpPushItem, returns the repo its pushsource.PushItem
should be uploaded to.
Subclasses MAY override this to return a variable default destination.
Subclasses must set MULTI_UPLOAD_CONTEXT to True for this to be used.
"""
return "" # pragma: no cover

@classmethod
def for_item(cls, pushsource_item, **kwargs):
"""Given a pushsource.PushItem, returns an instance of a PulpPushItem wrapper
Expand Down Expand Up @@ -136,12 +154,11 @@ def match_items_units(cls, items, units):

return klass.match_items_units(items, units)

@classmethod
def upload_context(cls, pulp_client):
def upload_context(self, pulp_client):
"""Return a context object used during uploads.
The context object will be shared across all uploads for a specific content
type.
The context object should be shared across all uploads for a specific
content type. Subclasses classes may introduce multiple contexts.
Subclasses MAY override this to provide their own context
(e.g. to cache a value rather than recalculating per upload).
Expand Down
43 changes: 31 additions & 12 deletions pubtools/_pulp/tasks/push/items/erratum.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging

import re
import attr
from pushsource import ErratumPushItem
from pubtools.pulplib import (
Expand All @@ -18,24 +18,44 @@
class ErratumUploadContext(UploadContext):
"""A custom context for Erratum uploads.
This context object avoids having to query the UPLOAD_REPO repeatedly.
This context object avoids having to query the PushItem's repo repeatedly.
"""

upload_repo = attr.ib(default=None)


class ErratumPushItemException(BaseException):
"""
Custom exception for PulpErratumPushItem specific issues.
Mainly used for when a year value can't be parsed from the advisory name.
"""


@supports_type(ErratumPushItem)
@attr.s(frozen=True, slots=True)
class PulpErratumPushItem(PulpPushItem):
"""Handler for errata."""

# Erratums are always uploaded to this repo first.
# Pulp stores the erratum pkglist for (erratum_id, repo) pair. If an erratum is
# uploaded for an already existing ID, the pkglist is overwritten if the repo is
# same, else is appended. To keep the pkglist consistent, all the erratums are
# uploaded to the same repo instead of a random repo from the list of destinations.
# For more details, refer RHELDST-12782.
UPLOAD_REPO = "all-rpm-content"
MULTI_UPLOAD_CONTEXT = True
ADVISORY_PATTERN = re.compile(r"RH.A-(\d{4})", re.IGNORECASE)
CONTENT_SPLIT_RANGES = [(2014, 2040), (8014, 8040)]

@property
def upload_repo(self):
# Split errata into different repos by year, assuming the advisory name
# is formatted like RHXA-YYYY
name_match = self.ADVISORY_PATTERN.match(self.pushsource_item.name)
if not name_match:
LOG.error("Bad Advisory name: '%s' does not contain a reasonable year value.",
self.pushsource_item.name)
raise ErratumPushItemException
year = int(name_match.group(1))
if not any([r[0] <= year <= r[1] for r in self.CONTENT_SPLIT_RANGES]):
LOG.warning("%s was not in a valid date range for repo content splitting, using the default.",
self.pushsource_item.name )
year = "0000"
return "all-erratum-content-%s" % year

@property
def unit_type(self):
Expand Down Expand Up @@ -89,11 +109,10 @@ def with_unit(self, unit):

return out

@classmethod
def upload_context(cls, pulp_client):
def upload_context(self, pulp_client):
return ErratumUploadContext(
client=pulp_client,
upload_repo=pulp_client.get_repository(cls.UPLOAD_REPO),
upload_repo=pulp_client.get_repository(self.upload_repo),
)

@classmethod
Expand Down
21 changes: 13 additions & 8 deletions pubtools/_pulp/tasks/push/items/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@

@attr.s(frozen=True, slots=True)
class RpmUploadContext(UploadContext):
"""A custom context for RPM uploads.
"""
A custom context for RPM uploads.
This context object avoids having to query the all-rpm-content repo repeatedly.
This context object lets us avoid querying the all-rpm-content-XX repos
repeatedly.
"""

upload_repo = attr.ib(default=None)
Expand All @@ -22,8 +24,12 @@ class RpmUploadContext(UploadContext):
class PulpRpmPushItem(PulpPushItem):
"""Handler for RPMs."""

# RPMs are always uploaded to this repo first.
UPLOAD_REPO = "all-rpm-content"
MULTI_UPLOAD_CONTEXT = True

@property
def upload_repo(self):
# Split RPMs into different repos by checksum
return "all-rpm-content-%s" % self.pushsource_item.sha256sum[0:2]

@property
def unit_type(self):
Expand Down Expand Up @@ -89,16 +95,15 @@ def match_items_units(cls, items, units):
for item in items:
yield item.with_unit(units_by_sum.get(item.pushsource_item.sha256sum))

@classmethod
def upload_context(cls, pulp_client):
def upload_context(self, pulp_client):
return RpmUploadContext(
client=pulp_client,
upload_repo=pulp_client.get_repository(cls.UPLOAD_REPO),
upload_repo=pulp_client.get_repository(self.upload_repo),
)

@property
def can_pre_push(self):
# We support pre-push by uploading to all-rpm-content first.
# We support pre-push by uploading to all-rpm-content-XX first.
return True

@property
Expand Down
18 changes: 11 additions & 7 deletions pubtools/_pulp/tasks/push/phase/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,17 @@ def run(self):
else:
# This item is not in Pulp, or otherwise needs a reupload.
item_type = type(item)
if item_type not in upload_context:
upload_context[item_type] = item_type.upload_context(
self.pulp_client
)

ctx = upload_context[item_type]

if item.MULTI_UPLOAD_CONTEXT:
if item_type not in upload_context:
upload_context[item_type] = {}
if item.upload_repo not in upload_context[item_type]:
upload_context[item_type][item.upload_repo] = \
item.upload_context(self.pulp_client)
ctx = upload_context[item_type][item.upload_repo]
else:
if item_type not in upload_context:
upload_context[item_type] = item.upload_context(self.pulp_client)
ctx = upload_context[item_type]
uploading += 1
self.put_future_output(item.ensure_uploaded(ctx))

Expand Down
5 changes: 4 additions & 1 deletion tests/fake/test_fake_persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ def test_state_persisted(tmpdir, data_path):
# It should already have a few repos since there is some default
# state.
repo_ids = sorted([repo.id for repo in client.search_repository()])
assert repo_ids == ["all-iso-content", "all-rpm-content", "redhat-maintenance"]
assert repo_ids == ["all-erratum-content-2019", "all-erratum-content-2020",
"all-iso-content", "all-rpm-content",
"all-rpm-content-54", "all-rpm-content-e8",
"redhat-maintenance"]

# Now add a bit more state.
# We use modules here because that's one of the more complex types
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
repos:
- _class: YumRepository
id: all-erratum-content-2019
- _class: YumRepository
id: all-erratum-content-2020
- _class: FileRepository
id: all-iso-content
- _class: YumRepository
id: all-rpm-content
- _class: YumRepository
id: all-rpm-content-54
- _class: YumRepository
id: all-rpm-content-e8
- _class: YumRepository
arch: x86_64
eng_product_id: 123
Expand Down Expand Up @@ -131,7 +139,7 @@ units:
type: other
release: '0'
repository_memberships:
- all-rpm-content
- all-erratum-content-2020
- dest1
rights: Copyright 2020 Red Hat Inc
severity: None
Expand Down Expand Up @@ -2757,7 +2765,7 @@ units:
type: other
release: '0'
repository_memberships:
- all-rpm-content
- all-erratum-content-2019
- dest1
rights: Copyright 2019 Red Hat Inc
severity: None
Expand Down Expand Up @@ -2903,7 +2911,7 @@ units:
type: other
release: '0'
repository_memberships:
- all-rpm-content
- all-erratum-content-2020
- dest1
rights: Copyright 2020 Red Hat Inc
severity: Important
Expand Down Expand Up @@ -7876,7 +7884,7 @@ units:
version: '5.21'
release: '1'
repository_memberships:
- all-rpm-content
- all-rpm-content-e8
- dest2
requires:
- _class: RpmDependency
Expand Down Expand Up @@ -7906,7 +7914,7 @@ units:
provides: []
release: '1'
repository_memberships:
- all-rpm-content
- all-rpm-content-54
- dest1
requires:
- _class: RpmDependency
Expand Down
8 changes: 8 additions & 0 deletions tests/logs/push/test_push/test_empty_push.pulp.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
repos:
- _class: YumRepository
id: all-erratum-content-2019
- _class: YumRepository
id: all-erratum-content-2020
- _class: FileRepository
id: all-iso-content
- _class: YumRepository
id: all-rpm-content
- _class: YumRepository
id: all-rpm-content-54
- _class: YumRepository
id: all-rpm-content-e8
- _class: YumRepository
arch: x86_64
eng_product_id: 123
Expand Down
18 changes: 13 additions & 5 deletions tests/logs/push/test_push/test_nopublish_push.pulp.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
repos:
- _class: YumRepository
id: all-erratum-content-2019
- _class: YumRepository
id: all-erratum-content-2020
- _class: FileRepository
id: all-iso-content
- _class: YumRepository
id: all-rpm-content
- _class: YumRepository
id: all-rpm-content-54
- _class: YumRepository
id: all-rpm-content-e8
- _class: YumRepository
arch: x86_64
eng_product_id: 123
Expand Down Expand Up @@ -131,7 +139,7 @@ units:
type: other
release: '0'
repository_memberships:
- all-rpm-content
- all-erratum-content-2020
- dest1
rights: Copyright 2020 Red Hat Inc
severity: None
Expand Down Expand Up @@ -2757,7 +2765,7 @@ units:
type: other
release: '0'
repository_memberships:
- all-rpm-content
- all-erratum-content-2019
- dest1
rights: Copyright 2019 Red Hat Inc
severity: None
Expand Down Expand Up @@ -2903,7 +2911,7 @@ units:
type: other
release: '0'
repository_memberships:
- all-rpm-content
- all-erratum-content-2020
- dest1
rights: Copyright 2020 Red Hat Inc
severity: Important
Expand Down Expand Up @@ -7876,7 +7884,7 @@ units:
version: '5.21'
release: '1'
repository_memberships:
- all-rpm-content
- all-rpm-content-e8
- dest1
- dest2
requires:
Expand Down Expand Up @@ -7907,7 +7915,7 @@ units:
provides: []
release: '1'
repository_memberships:
- all-rpm-content
- all-rpm-content-54
- dest1
requires:
- _class: RpmDependency
Expand Down
Loading

0 comments on commit 6ded1b3

Please sign in to comment.