From 4c37617f6e19f6333e4398288a4339abf6b30398 Mon Sep 17 00:00:00 2001 From: Aidan McMahon-Smith Date: Tue, 2 Apr 2024 12:09:20 +0200 Subject: [PATCH] Update upload_repo for arc split [] Currently, all rpm and errata content is pushed to all-rpm-content repo in each environment. Accumulation in the repos is generating concern for performance. To alleviate the build-up, we'll be splitting content between several repos; 'all-rpm-content-XX' for rpms and 'all-erratum-content-YYYY' for errata. This change refactors the upload_repo and upload_context to reflect this split. It does not include validation for the errata year range, as that feels like a specific implimentation detail. --- pubtools/_pulp/services/fakepulp.py | 5 ++++ pubtools/_pulp/tasks/push/items/base.py | 9 +++++-- pubtools/_pulp/tasks/push/items/erratum.py | 24 ++++++++++--------- pubtools/_pulp/tasks/push/items/rpm.py | 21 +++++++++------- pubtools/_pulp/tasks/push/phase/upload.py | 21 +++++++++------- tests/fake/test_fake_persistence.py | 5 +++- .../test_association_race.pulp.yaml | 18 ++++++++++---- .../push/test_push/test_empty_push.pulp.yaml | 8 +++++++ .../test_push/test_nopublish_push.pulp.yaml | 18 ++++++++++---- .../test_push/test_typical_push.pulp.yaml | 18 ++++++++++---- .../push/test_push/test_update_push.pulp.yaml | 16 +++++++++---- .../test_push_prepush/test_pre_push.pulp.yaml | 12 ++++++++-- .../test_pre_push_no_dest.pulp.yaml | 10 +++++++- tests/push/test_push.py | 2 +- tests/push/test_push_prepush.py | 10 ++++---- tests/push/test_upload_sharing.py | 3 ++- 16 files changed, 141 insertions(+), 59 deletions(-) diff --git a/pubtools/_pulp/services/fakepulp.py b/pubtools/_pulp/services/fakepulp.py index 6c30544e..caef88be 100644 --- a/pubtools/_pulp/services/fakepulp.py +++ b/pubtools/_pulp/services/fakepulp.py @@ -136,6 +136,11 @@ def load_initial(self): self.ctrl.insert_repository(FileRepository(id="redhat-maintenance")) self.ctrl.insert_repository(FileRepository(id="all-iso-content")) self.ctrl.insert_repository(YumRepository(id="all-rpm-content")) + # Repos required for common test data (RHELDST-23264) + self.ctrl.insert_repository(YumRepository(id="all-rpm-content-e8")) + self.ctrl.insert_repository(YumRepository(id="all-rpm-content-54")) + self.ctrl.insert_repository(YumRepository(id="all-erratum-content-2019")) + self.ctrl.insert_repository(YumRepository(id="all-erratum-content-2020")) def load(self): """Load data into the fake from previously serialized state (if any). diff --git a/pubtools/_pulp/tasks/push/items/base.py b/pubtools/_pulp/tasks/push/items/base.py index 96a2ecbc..0b1596ca 100644 --- a/pubtools/_pulp/tasks/push/items/base.py +++ b/pubtools/_pulp/tasks/push/items/base.py @@ -95,6 +95,11 @@ class PulpPushItem(object): to a single unit (e.g. modulemd YAML files; comps.xml files). """ + MULTI_CONTEXT = False + """ + Can the push item class create a different upload context based on the data + it holds. e.g PulpRpmPushItem has checksum separated upload repos/contexts + """ @classmethod def for_item(cls, pushsource_item, **kwargs): """Given a pushsource.PushItem, returns an instance of a PulpPushItem wrapper @@ -140,8 +145,8 @@ def match_items_units(cls, items, units): def upload_context(cls, pulp_client): """Return a context object used during uploads. - The context object will be shared across all uploads for a specific content - type. + The context object should be shared across all uploads for a specific + content type. Subclasses classes may introduce multiple contexts for performance. Subclasses MAY override this to provide their own context (e.g. to cache a value rather than recalculating per upload). diff --git a/pubtools/_pulp/tasks/push/items/erratum.py b/pubtools/_pulp/tasks/push/items/erratum.py index bb9dbfa2..bb0043db 100644 --- a/pubtools/_pulp/tasks/push/items/erratum.py +++ b/pubtools/_pulp/tasks/push/items/erratum.py @@ -18,7 +18,7 @@ class ErratumUploadContext(UploadContext): """A custom context for Erratum uploads. - This context object avoids having to query the UPLOAD_REPO repeatedly. + This context object avoids having to query the PushItem's repo repeatedly. """ upload_repo = attr.ib(default=None) @@ -29,13 +29,16 @@ class ErratumUploadContext(UploadContext): class PulpErratumPushItem(PulpPushItem): """Handler for errata.""" - # Erratums are always uploaded to this repo first. - # Pulp stores the erratum pkglist for (erratum_id, repo) pair. If an erratum is - # uploaded for an already existing ID, the pkglist is overwritten if the repo is - # same, else is appended. To keep the pkglist consistent, all the erratums are - # uploaded to the same repo instead of a random repo from the list of destinations. - # For more details, refer RHELDST-12782. - UPLOAD_REPO = "all-rpm-content" + MULTI_CONTEXT = True + # To improve performance, we split the errata into different repos by year + # Assuming the advisory name is formatted like RHXA-YYYY[extravalues] + @property + def upload_repo(self): + try: + return "all-erratum-content-%s" % self.pushsource_item.name[5:9] + except KeyError: + # Poorly formatted errata name, use default + return "all-erratum-content-0000" @property def unit_type(self): @@ -89,11 +92,10 @@ def with_unit(self, unit): return out - @classmethod - def upload_context(cls, pulp_client): + def upload_context(self, pulp_client): return ErratumUploadContext( client=pulp_client, - upload_repo=pulp_client.get_repository(cls.UPLOAD_REPO), + upload_repo=pulp_client.get_repository(self.upload_repo), ) @classmethod diff --git a/pubtools/_pulp/tasks/push/items/rpm.py b/pubtools/_pulp/tasks/push/items/rpm.py index b6cd8dda..60bb461f 100644 --- a/pubtools/_pulp/tasks/push/items/rpm.py +++ b/pubtools/_pulp/tasks/push/items/rpm.py @@ -9,9 +9,10 @@ @attr.s(frozen=True, slots=True) class RpmUploadContext(UploadContext): - """A custom context for RPM uploads. - - This context object avoids having to query the all-rpm-content repo repeatedly. + """ + A custom context for RPM uploads. + This context object lets us avoid querying the all-rpm-content-XX repos + repeatedly. """ upload_repo = attr.ib(default=None) @@ -22,8 +23,11 @@ class RpmUploadContext(UploadContext): class PulpRpmPushItem(PulpPushItem): """Handler for RPMs.""" - # RPMs are always uploaded to this repo first. - UPLOAD_REPO = "all-rpm-content" + MULTI_CONTEXT = True + + @property + def upload_repo(self): + return "all-rpm-content-%s" % self.pushsource_item.sha256sum[0:2] @property def unit_type(self): @@ -89,16 +93,15 @@ def match_items_units(cls, items, units): for item in items: yield item.with_unit(units_by_sum.get(item.pushsource_item.sha256sum)) - @classmethod - def upload_context(cls, pulp_client): + def upload_context(self, pulp_client): return RpmUploadContext( client=pulp_client, - upload_repo=pulp_client.get_repository(cls.UPLOAD_REPO), + upload_repo=pulp_client.get_repository(self.upload_repo), ) @property def can_pre_push(self): - # We support pre-push by uploading to all-rpm-content first. + # We support pre-push by uploading to all-rpm-content-XX first. return True @property diff --git a/pubtools/_pulp/tasks/push/phase/upload.py b/pubtools/_pulp/tasks/push/phase/upload.py index b8f65a3f..f96dd306 100644 --- a/pubtools/_pulp/tasks/push/phase/upload.py +++ b/pubtools/_pulp/tasks/push/phase/upload.py @@ -58,15 +58,20 @@ def run(self): prepush_skipped += 1 self.put_output(item) else: - # This item is not in Pulp, or otherwise needs a reupload. item_type = type(item) - if item_type not in upload_context: - upload_context[item_type] = item_type.upload_context( - self.pulp_client - ) - - ctx = upload_context[item_type] - + if item_type.MULTI_CONTEXT: + if item_type not in upload_context: + upload_context[item_type] = {} + upload_context[item_type][item.upload_repo] = \ + item.upload_context(self.pulp_client) + elif item.upload_repo not in upload_context[item_type]: + upload_context[item_type][item.upload_repo] = \ + item.upload_context( self.pulp_client) + ctx = upload_context[item_type][item.upload_repo] + else: + if item_type not in upload_context: + upload_context[item_type] = item.upload_context(self.pulp_client) + ctx = upload_context[item_type] uploading += 1 self.put_future_output(item.ensure_uploaded(ctx)) diff --git a/tests/fake/test_fake_persistence.py b/tests/fake/test_fake_persistence.py index bb7f248a..e5ed28b2 100644 --- a/tests/fake/test_fake_persistence.py +++ b/tests/fake/test_fake_persistence.py @@ -24,7 +24,10 @@ def test_state_persisted(tmpdir, data_path): # It should already have a few repos since there is some default # state. repo_ids = sorted([repo.id for repo in client.search_repository()]) - assert repo_ids == ["all-iso-content", "all-rpm-content", "redhat-maintenance"] + assert repo_ids == ["all-erratum-content-2019", "all-erratum-content-2020", + "all-iso-content", "all-rpm-content", + "all-rpm-content-54", "all-rpm-content-e8", + "redhat-maintenance"] # Now add a bit more state. # We use modules here because that's one of the more complex types diff --git a/tests/logs/push/test_association_race/test_association_race.pulp.yaml b/tests/logs/push/test_association_race/test_association_race.pulp.yaml index d731f31a..343d0d6a 100644 --- a/tests/logs/push/test_association_race/test_association_race.pulp.yaml +++ b/tests/logs/push/test_association_race/test_association_race.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 @@ -131,7 +139,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: None @@ -2757,7 +2765,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2019 - dest1 rights: Copyright 2019 Red Hat Inc severity: None @@ -2903,7 +2911,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: Important @@ -7876,7 +7884,7 @@ units: version: '5.21' release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-e8 - dest2 requires: - _class: RpmDependency @@ -7906,7 +7914,7 @@ units: provides: [] release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-54 - dest1 requires: - _class: RpmDependency diff --git a/tests/logs/push/test_push/test_empty_push.pulp.yaml b/tests/logs/push/test_push/test_empty_push.pulp.yaml index 8a22d3ac..fb52b268 100644 --- a/tests/logs/push/test_push/test_empty_push.pulp.yaml +++ b/tests/logs/push/test_push/test_empty_push.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 diff --git a/tests/logs/push/test_push/test_nopublish_push.pulp.yaml b/tests/logs/push/test_push/test_nopublish_push.pulp.yaml index 9b274ddd..bb097060 100644 --- a/tests/logs/push/test_push/test_nopublish_push.pulp.yaml +++ b/tests/logs/push/test_push/test_nopublish_push.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 @@ -131,7 +139,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: None @@ -2757,7 +2765,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2019 - dest1 rights: Copyright 2019 Red Hat Inc severity: None @@ -2903,7 +2911,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: Important @@ -7876,7 +7884,7 @@ units: version: '5.21' release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-e8 - dest1 - dest2 requires: @@ -7907,7 +7915,7 @@ units: provides: [] release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-54 - dest1 requires: - _class: RpmDependency diff --git a/tests/logs/push/test_push/test_typical_push.pulp.yaml b/tests/logs/push/test_push/test_typical_push.pulp.yaml index 65dc67b8..99f8d59f 100644 --- a/tests/logs/push/test_push/test_typical_push.pulp.yaml +++ b/tests/logs/push/test_push/test_typical_push.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 @@ -131,7 +139,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: None @@ -2757,7 +2765,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2019 - dest1 rights: Copyright 2019 Red Hat Inc severity: None @@ -2903,7 +2911,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: Important @@ -7880,7 +7888,7 @@ units: version: '5.21' release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-e8 - dest1 - dest2 requires: @@ -7912,7 +7920,7 @@ units: provides: [] release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-54 - dest1 requires: - _class: RpmDependency diff --git a/tests/logs/push/test_push/test_update_push.pulp.yaml b/tests/logs/push/test_push/test_update_push.pulp.yaml index 4e83d71c..c42421f5 100644 --- a/tests/logs/push/test_push/test_update_push.pulp.yaml +++ b/tests/logs/push/test_push/test_update_push.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 @@ -131,7 +139,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: None @@ -2757,7 +2765,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2019 - dest1 rights: Copyright 2019 Red Hat Inc severity: None @@ -2903,7 +2911,7 @@ units: type: other release: '0' repository_memberships: - - all-rpm-content + - all-erratum-content-2020 - dest1 rights: Copyright 2020 Red Hat Inc severity: Important @@ -7881,7 +7889,7 @@ units: version: '5.21' release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-e8 - dest1 - dest2 requires: diff --git a/tests/logs/push/test_push_prepush/test_pre_push.pulp.yaml b/tests/logs/push/test_push_prepush/test_pre_push.pulp.yaml index 8a5af83b..65701d74 100644 --- a/tests/logs/push/test_push_prepush/test_pre_push.pulp.yaml +++ b/tests/logs/push/test_push_prepush/test_pre_push.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 @@ -33,7 +41,7 @@ units: version: '5.21' release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-e8 requires: - _class: RpmDependency epoch: '0' @@ -62,7 +70,7 @@ units: provides: [] release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-54 requires: - _class: RpmDependency epoch: '0' diff --git a/tests/logs/push/test_push_prepush/test_pre_push_no_dest.pulp.yaml b/tests/logs/push/test_push_prepush/test_pre_push_no_dest.pulp.yaml index 758689be..6ecbacdf 100644 --- a/tests/logs/push/test_push_prepush/test_pre_push_no_dest.pulp.yaml +++ b/tests/logs/push/test_push_prepush/test_pre_push_no_dest.pulp.yaml @@ -1,8 +1,16 @@ repos: +- _class: YumRepository + id: all-erratum-content-2019 +- _class: YumRepository + id: all-erratum-content-2020 - _class: FileRepository id: all-iso-content - _class: YumRepository id: all-rpm-content +- _class: YumRepository + id: all-rpm-content-54 +- _class: YumRepository + id: all-rpm-content-e8 - _class: YumRepository arch: x86_64 eng_product_id: 123 @@ -33,7 +41,7 @@ units: version: '5.21' release: '1' repository_memberships: - - all-rpm-content + - all-rpm-content-e8 requires: - _class: RpmDependency epoch: '0' diff --git a/tests/push/test_push.py b/tests/push/test_push.py index dc6f35d7..de919929 100644 --- a/tests/push/test_push.py +++ b/tests/push/test_push.py @@ -131,7 +131,7 @@ def test_typical_push( ) # It should have invoked hook(s). - assert len(hookspy) == 25 + assert len(hookspy) == 29 (hook_name, hook_kwargs) = hookspy[0] assert hook_name == "task_start" (hook_name, hook_kwargs) = hookspy[1] diff --git a/tests/push/test_push_prepush.py b/tests/push/test_push_prepush.py index 4b7e1b9f..8c1f4ce9 100644 --- a/tests/push/test_push_prepush.py +++ b/tests/push/test_push_prepush.py @@ -57,14 +57,16 @@ def test_pre_push( # It should have uploaded some stuff assert units + # RPMs are separated into different arc repos by checksum + expected_repo_mapping = {"test-srpm01": ["all-rpm-content-54"], + "walrus": ["all-rpm-content-e8"]} + for unit in units: # The only type of content is RPMs, because that's all we support for # pre-push right now assert isinstance(unit, RpmUnit) - # And the only repo containing those RPMs should be all-rpm-content, - # because that's how pre-push works - assert unit.repository_memberships == ["all-rpm-content"] + assert unit.repository_memberships == expected_repo_mapping[unit.name] def test_pre_push_no_dest( @@ -124,4 +126,4 @@ def test_pre_push_no_dest( assert isinstance(units[0], RpmUnit) # Only to this repo - assert units[0].repository_memberships == ["all-rpm-content"] + assert units[0].repository_memberships == ["all-rpm-content-e8"] diff --git a/tests/push/test_upload_sharing.py b/tests/push/test_upload_sharing.py index f3b52c30..0d27b9e3 100644 --- a/tests/push/test_upload_sharing.py +++ b/tests/push/test_upload_sharing.py @@ -39,8 +39,9 @@ def test_uploads_shared(data_path): """Upload phase allows for uploads of identical content to be reused.""" pulp_ctrl = FakeController() + pulp_ctrl.insert_repository(YumRepository(id="all-rpm-content-54")) + pulp_ctrl.insert_repository(YumRepository(id="all-rpm-content-e8")) - pulp_ctrl.insert_repository(YumRepository(id="all-rpm-content")) pulp_ctrl.insert_repository(YumRepository(id="repo1")) pulp_ctrl.insert_repository(YumRepository(id="repo2")) pulp_ctrl.insert_repository(YumRepository(id="repo3"))