From 1d4469c20425e03a57eee15b80f7cf8807b9696c Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:10:15 +0200 Subject: [PATCH] [DPE-3940] Support Upgrade (#397) ## Issue `juju refresh` is not supported ## Solution support `juju refresh` by implementing functions from upgrade lib --------- Co-authored-by: Mehdi Bendriss --- lib/charms/data_platform_libs/v0/upgrade.py | 231 +++--------------- .../mongodb/v0/config_server_interface.py | 7 +- lib/charms/mongodb/v0/mongodb.py | 4 + lib/charms/mongodb/v0/mongodb_tls.py | 26 +- lib/charms/mongodb/v0/upgrade.py | 127 +++++++++- lib/charms/mongodb/v1/mongodb_backups.py | 15 +- lib/charms/mongodb/v1/mongodb_provider.py | 13 +- lib/charms/mongodb/v1/shards_interface.py | 32 ++- requirements.txt | 2 + src/charm.py | 76 ++++-- src/config.py | 12 +- 11 files changed, 299 insertions(+), 246 deletions(-) diff --git a/lib/charms/data_platform_libs/v0/upgrade.py b/lib/charms/data_platform_libs/v0/upgrade.py index 0db6f63bc..ef74644de 100644 --- a/lib/charms/data_platform_libs/v0/upgrade.py +++ b/lib/charms/data_platform_libs/v0/upgrade.py @@ -263,8 +263,9 @@ def restart(self, event) -> None: import json import logging from abc import ABC, abstractmethod -from typing import List, Literal, Optional, Set, Tuple +from typing import Dict, List, Literal, Optional, Set, Tuple +import poetry.core.constraints.version as poetry_version from ops.charm import ( ActionEvent, CharmBase, @@ -284,199 +285,31 @@ def restart(self, event) -> None: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 10 +LIBPATCH = 16 -PYDEPS = ["pydantic>=1.10,<2"] +PYDEPS = ["pydantic>=1.10,<2", "poetry-core"] logger = logging.getLogger(__name__) # --- DEPENDENCY RESOLUTION FUNCTIONS --- -def build_complete_sem_ver(version: str) -> list[int]: - """Builds complete major.minor.patch version from version string. - - Returns: - List of major.minor.patch version integers - """ - versions = [int(ver) if ver != "*" else 0 for ver in str(version).split(".")] - - # padding with 0s until complete major.minor.patch - return (versions + 3 * [0])[:3] - - -def verify_caret_requirements(version: str, requirement: str) -> bool: - """Verifies version requirements using carats. - - Args: - version: the version currently in use - requirement: the requirement version - - Returns: - True if `version` meets defined `requirement`. Otherwise False - """ - if not requirement.startswith("^"): - return True - - requirement = requirement[1:] - - sem_version = build_complete_sem_ver(version) - sem_requirement = build_complete_sem_ver(requirement) - - # caret uses first non-zero character, not enough to just count '.' - if sem_requirement[0] == 0: - max_version_index = requirement.count(".") - for i, semver in enumerate(sem_requirement): - if semver != 0: - max_version_index = i - break - else: - max_version_index = 0 - - for i in range(3): - # version higher than first non-zero - if (i <= max_version_index) and (sem_version[i] != sem_requirement[i]): - return False - - # version either higher or lower than first non-zero - if (i > max_version_index) and (sem_version[i] < sem_requirement[i]): - return False - - return True - - -def verify_tilde_requirements(version: str, requirement: str) -> bool: - """Verifies version requirements using tildes. - - Args: - version: the version currently in use - requirement: the requirement version - - Returns: - True if `version` meets defined `requirement`. Otherwise False - """ - if not requirement.startswith("~"): - return True - - requirement = requirement[1:] - - sem_version = build_complete_sem_ver(version) - sem_requirement = build_complete_sem_ver(requirement) - - max_version_index = min(1, requirement.count(".")) - - for i in range(3): - # version higher before requirement level - if (i < max_version_index) and (sem_version[i] > sem_requirement[i]): - return False - - # version either higher or lower at requirement level - if (i == max_version_index) and (sem_version[i] != sem_requirement[i]): - return False - - # version lower after requirement level - if (i > max_version_index) and (sem_version[i] < sem_requirement[i]): - return False - - # must be valid - return True - - -def verify_wildcard_requirements(version: str, requirement: str) -> bool: - """Verifies version requirements using wildcards. - - Args: - version: the version currently in use - requirement: the requirement version - - Returns: - True if `version` meets defined `requirement`. Otherwise False - """ - if "*" not in requirement: - return True - - sem_version = build_complete_sem_ver(version) - sem_requirement = build_complete_sem_ver(requirement) - - max_version_index = requirement.count(".") - - for i in range(3): - # version not the same before wildcard - if (i < max_version_index) and (sem_version[i] != sem_requirement[i]): - return False - - # version not higher after wildcard - if (i == max_version_index) and (sem_version[i] < sem_requirement[i]): - return False - - # must be valid - return True - - -def verify_inequality_requirements(version: str, requirement: str) -> bool: - """Verifies version requirements using inequalities. - - Args: - version: the version currently in use - requirement: the requirement version - - Returns: - True if `version` meets defined `requirement`. Otherwise False - """ - if not any(char for char in [">", ">="] if requirement.startswith(char)): - return True - - raw_requirement = requirement.replace(">", "").replace("=", "") - - sem_version = build_complete_sem_ver(version) - sem_requirement = build_complete_sem_ver(raw_requirement) - - max_version_index = raw_requirement.count(".") or 0 - - for i in range(3): - # valid at same requirement level - if ( - (i == max_version_index) - and ("=" in requirement) - and (sem_version[i] == sem_requirement[i]) - ): - return True - - # version not increased at any point - if sem_version[i] < sem_requirement[i]: - return False - - # valid - if sem_version[i] > sem_requirement[i]: - return True - - # must not be valid - return False - - def verify_requirements(version: str, requirement: str) -> bool: - """Verifies a specified version against defined requirements. + """Verifies a specified version against defined constraint. - Supports caret (`^`), tilde (`~`), wildcard (`*`) and greater-than inequalities (`>`, `>=`) + Supports Poetry version constraints + https://python-poetry.org/docs/dependency-specification/#version-constraints Args: version: the version currently in use - requirement: the requirement version + requirement: Poetry version constraint Returns: True if `version` meets defined `requirement`. Otherwise False """ - if not all( - [ - verify_inequality_requirements(version=version, requirement=requirement), - verify_caret_requirements(version=version, requirement=requirement), - verify_tilde_requirements(version=version, requirement=requirement), - verify_wildcard_requirements(version=version, requirement=requirement), - ] - ): - return False - - return True + return poetry_version.parse_constraint(requirement).allows( + poetry_version.Version.parse(version) + ) # --- DEPENDENCY MODEL TYPES --- @@ -513,7 +346,7 @@ class KafkaDependenciesModel(BaseModel): print(model.dict()) # exporting back validated deps """ - dependencies: dict[str, str] + dependencies: Dict[str, str] name: str upgrade_supported: str version: str @@ -521,19 +354,14 @@ class KafkaDependenciesModel(BaseModel): @validator("dependencies", "upgrade_supported", each_item=True) @classmethod def dependencies_validator(cls, value): - """Validates values with dependencies for multiple special characters.""" + """Validates version constraint.""" if isinstance(value, dict): deps = value.values() else: deps = [value] - chars = ["~", "^", ">", "*"] - for dep in deps: - if (count := sum([dep.count(char) for char in chars])) != 1: - raise ValueError( - f"Value uses greater than 1 special character (^ ~ > *). Found {count}." - ) + poetry_version.parse_constraint(dep) return value @@ -673,7 +501,7 @@ class DataUpgrade(Object, ABC): STATES = ["recovery", "failed", "idle", "ready", "upgrading", "completed"] - on = UpgradeEvents() # pyright: ignore [reportGeneralTypeIssues] + on = UpgradeEvents() # pyright: ignore [reportAssignmentType] def __init__( self, @@ -778,6 +606,21 @@ def upgrade_stack(self, stack: List[int]) -> None: self.peer_relation.data[self.charm.app].update({"upgrade-stack": json.dumps(stack)}) self._upgrade_stack = stack + @property + def other_unit_states(self) -> list: + """Current upgrade state for other units. + + Returns: + Unsorted list of upgrade states for other units. + """ + if not self.peer_relation: + return [] + + return [ + self.peer_relation.data[unit].get("state", "") + for unit in list(self.peer_relation.units) + ] + @property def unit_states(self) -> list: """Current upgrade state for all units. @@ -1067,6 +910,10 @@ def _on_upgrade_charm(self, event: UpgradeCharmEvent) -> None: self.charm.unit.status = WaitingStatus("other units upgrading first...") self.peer_relation.data[self.charm.unit].update({"state": "ready"}) + if self.charm.app.planned_units() == 1: + # single unit upgrade, emit upgrade_granted event right away + getattr(self.on, "upgrade_granted").emit() + else: # for k8s run version checks only on highest ordinal unit if ( @@ -1093,9 +940,9 @@ def on_upgrade_changed(self, event: EventBase) -> None: logger.debug("Cluster failed to upgrade, exiting...") return - if self.cluster_state == "recovery": - logger.debug("Cluster in recovery, deferring...") - event.defer() + if self.substrate == "vm" and self.cluster_state == "recovery": + # skip run while in recovery. The event will be retrigged when the cluster is ready + logger.debug("Cluster in recovery, skip...") return # if all units completed, mark as complete @@ -1116,8 +963,7 @@ def on_upgrade_changed(self, event: EventBase) -> None: logger.debug("upgrade-changed event handled before pre-checks, exiting...") return - logger.debug("Did not find upgrade-stack or completed cluster state, deferring...") - event.defer() + logger.debug("Did not find upgrade-stack or completed cluster state, skipping...") return # upgrade ongoing, set status for waiting units @@ -1147,6 +993,7 @@ def on_upgrade_changed(self, event: EventBase) -> None: self.charm.unit == top_unit and top_state in ["ready", "upgrading"] and self.cluster_state == "ready" + and "upgrading" not in self.other_unit_states ): logger.debug( f"{top_unit.name} is next to upgrade, emitting `upgrade_granted` event and upgrading..." diff --git a/lib/charms/mongodb/v0/config_server_interface.py b/lib/charms/mongodb/v0/config_server_interface.py index acb23df00..dadf4199f 100644 --- a/lib/charms/mongodb/v0/config_server_interface.py +++ b/lib/charms/mongodb/v0/config_server_interface.py @@ -42,7 +42,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 11 +LIBPATCH = 12 class ClusterProvider(Object): @@ -82,6 +82,11 @@ def pass_hook_checks(self, event: EventBase) -> bool: ) return False + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if not self.charm.unit.is_leader(): return False diff --git a/lib/charms/mongodb/v0/mongodb.py b/lib/charms/mongodb/v0/mongodb.py index 47dab18cb..2b2495e96 100644 --- a/lib/charms/mongodb/v0/mongodb.py +++ b/lib/charms/mongodb/v0/mongodb.py @@ -286,6 +286,10 @@ def remove_replset_member(self, hostname: str) -> None: logger.debug("rs_config: %r", dumps(rs_config["config"])) self.client.admin.command("replSetReconfig", rs_config["config"]) + def step_down_primary(self) -> None: + """Steps down the current primary, forcing a re-election.""" + self.client.admin.command("replSetStepDown", {"stepDownSecs": "60"}) + def create_user(self, config: MongoDBConfiguration): """Create user. diff --git a/lib/charms/mongodb/v0/mongodb_tls.py b/lib/charms/mongodb/v0/mongodb_tls.py index f64cd4638..c061de7f3 100644 --- a/lib/charms/mongodb/v0/mongodb_tls.py +++ b/lib/charms/mongodb/v0/mongodb_tls.py @@ -38,7 +38,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 12 +LIBPATCH = 13 logger = logging.getLogger(__name__) @@ -73,6 +73,10 @@ def is_tls_enabled(self, internal: bool): def _on_set_tls_private_key(self, event: ActionEvent) -> None: """Set the TLS private key, which will be used for requesting the certificate.""" + if not self.charm.upgrade.idle: + event.fail("Cannot set TLS key - upgrade is in progress.") + return + logger.debug("Request to set TLS private key received.") if self.charm.is_role(Config.Role.MONGOS) and not self.charm.has_config_server(): logger.error( @@ -141,11 +145,21 @@ def _on_tls_relation_joined(self, event: RelationJoinedEvent) -> None: event.defer() return + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + self.request_certificate(None, internal=True) self.request_certificate(None, internal=False) def _on_tls_relation_broken(self, event: RelationBrokenEvent) -> None: """Disable TLS when TLS relation broken.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + logger.debug("Disabling external and internal TLS for unit: %s", self.charm.unit.name) for internal in [True, False]: @@ -165,6 +179,11 @@ def _on_tls_relation_broken(self, event: RelationBrokenEvent) -> None: def _on_certificate_available(self, event: CertificateAvailableEvent) -> None: """Enable TLS when TLS certificate available.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + if self.charm.is_role(Config.Role.MONGOS) and not self.charm.config_server_db: logger.debug( "mongos requires config-server in order to start, do not restart with TLS until integrated to config-server" @@ -232,6 +251,11 @@ def waiting_for_certs(self): def _on_certificate_expiring(self, event: CertificateExpiringEvent) -> None: """Request the new certificate when old certificate is expiring.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + if self.charm.is_role(Config.Role.MONGOS) and not self.charm.has_config_server(): logger.info( "mongos is not running (not integrated to config-server) deferring renewal of certificates." diff --git a/lib/charms/mongodb/v0/upgrade.py b/lib/charms/mongodb/v0/upgrade.py index def9efbfb..88a889c86 100644 --- a/lib/charms/mongodb/v0/upgrade.py +++ b/lib/charms/mongodb/v0/upgrade.py @@ -15,9 +15,11 @@ UpgradeGrantedEvent, ) from charms.mongodb.v0.mongodb import MongoDBConfiguration, MongoDBConnection +from charms.operator_libs_linux.v1 import snap from ops.charm import CharmBase from ops.model import ActiveStatus from pydantic import BaseModel +from tenacity import Retrying, retry, stop_after_attempt, wait_fixed from typing_extensions import override from config import Config @@ -25,6 +27,7 @@ logger = logging.getLogger(__name__) WRITE_KEY = "write_value" +MONGOD_SERVICE = "mongod" # The unique Charmhub library identifier, never change it LIBID = "aad46b9f0ddb4cb392982a52a596ec9b" @@ -34,7 +37,16 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 2 +LIBPATCH = 3 + +ROLLBACK_INSTRUCTIONS = """Unit failed to upgrade and requires manual rollback to previous stable version. + 1. Re-run `pre-upgrade-check` action on the leader unit to enter 'recovery' state + 2. Run `juju refresh` to the previously deployed charm revision +""" + + +class FailedToElectNewPrimaryError(Exception): + """Raised when a new primary isn't elected after stepping down.""" class MongoDBDependencyModel(BaseModel): @@ -51,6 +63,15 @@ def __init__(self, charm: CharmBase, **kwargs): super().__init__(charm, **kwargs) self.charm = charm + @property + def idle(self) -> bool: + """Checks if cluster has completed upgrade. + + Returns: + True if cluster has completed upgrade. Otherwise False + """ + return not bool(self.upgrade_stack) + @override def pre_upgrade_check(self) -> None: """Verifies that an upgrade can be done on the MongoDB deployment.""" @@ -58,37 +79,119 @@ def pre_upgrade_check(self) -> None: if self.charm.is_role(Config.Role.SHARD): raise ClusterNotReadyError( - message=default_message, cause="Cannot run pre-upgrade check on shards" + message=default_message, + cause="Cannot run pre-upgrade check on shards", + resolution="Run this action on config-server.", ) if not self.is_cluster_healthy(): - raise ClusterNotReadyError(message=default_message, cause="Cluster is not healthy") + raise ClusterNotReadyError( + message=default_message, + cause="Cluster is not healthy", + resolution="Please check juju status for information", + ) if not self.is_cluster_able_to_read_write(): - raise ClusterNotReadyError(message=default_message, cause="Cluster cannot read/write") + raise ClusterNotReadyError( + message=default_message, cause="Cluster cannot read/write - please check logs" + ) # Future PR - sharding based checks + @retry( + stop=stop_after_attempt(20), + wait=wait_fixed(1), + reraise=True, + ) + def post_upgrade_check(self) -> None: + """Runs necessary checks validating the unit is in a healthy state after upgrade.""" + if not self.is_cluster_able_to_read_write(): + raise ClusterNotReadyError( + message="post-upgrade check failed and cannot safely upgrade", + cause="Cluster cannot read/write", + ) + @override def build_upgrade_stack(self) -> list[int]: + """Builds an upgrade stack, specifying the order of nodes to upgrade.""" + if self.charm.is_role(Config.Role.CONFIG_SERVER): + # TODO implement in a future PR a stack for shards and config server + pass + elif self.charm.is_role(Config.Role.REPLICATION): + return self.get_replica_set_upgrade_stack() + + def get_replica_set_upgrade_stack(self) -> list[int]: """Builds an upgrade stack, specifying the order of nodes to upgrade. - TODO Implement in DPE-3940 + MongoDB Specific: The primary should be upgraded last, so the unit with the primary is + put at the very bottom of the stack. """ + upgrade_stack = [] + units = set([self.charm.unit] + list(self.charm.peers.units)) # type: ignore[reportOptionalMemberAccess] + primary_unit_id = None + for unit in units: + unit_id = int(unit.name.split("/")[-1]) + if unit.name == self.charm.primary: + primary_unit_id = unit_id + continue + + upgrade_stack.append(unit_id) + + upgrade_stack.insert(0, primary_unit_id) + return upgrade_stack @override def log_rollback_instructions(self) -> None: - """Logs the rollback instructions in case of failure to upgrade. - - TODO Implement in DPE-3940 - """ + """Logs the rollback instructions in case of failure to upgrade.""" + logger.critical(ROLLBACK_INSTRUCTIONS) @override def _on_upgrade_granted(self, event: UpgradeGrantedEvent) -> None: - """Execute a series of upgrade steps. + """Execute a series of upgrade steps.""" + # TODO: Future PR - check compatibility of new mongod version with current mongos versions + self.charm.stop_charm_services() + + try: + self.charm.install_snap_packages(packages=Config.SNAP_PACKAGES) + except snap.SnapError: + logger.error("Unable to install Snap") + self.set_unit_failed() + return + + if self.charm.unit.name == self.charm.primary: + logger.debug("Stepping down current primary, before upgrading service...") + self.step_down_primary_and_wait_reelection() + + logger.info(f"{self.charm.unit.name} upgrading service...") + self.charm.restart_charm_services() + + try: + logger.debug("Running post-upgrade check...") + self.post_upgrade_check() + + logger.debug("Marking unit completed...") + self.set_unit_completed() + + # ensures leader gets it's own relation-changed when it upgrades + if self.charm.unit.is_leader(): + logger.debug("Re-emitting upgrade-changed on leader...") + self.on_upgrade_changed(event) + + except ClusterNotReadyError as e: + logger.error(e.cause) + self.set_unit_failed() + + def step_down_primary_and_wait_reelection(self) -> bool: + """Steps down the current primary and waits for a new one to be elected.""" + old_primary = self.charm.primary + with MongoDBConnection(self.charm.mongodb_config) as mongod: + mongod.step_down_primary() - TODO Implement in DPE-3940 - """ + for attempt in Retrying(stop=stop_after_attempt(30), wait=wait_fixed(1), reraise=True): + with attempt: + new_primary = self.charm.primary + if new_primary != old_primary: + raise FailedToElectNewPrimaryError() def is_cluster_healthy(self) -> bool: """Returns True if all nodes in the cluster/replcia set are healthy.""" diff --git a/lib/charms/mongodb/v1/mongodb_backups.py b/lib/charms/mongodb/v1/mongodb_backups.py index ee287df34..9760553e9 100644 --- a/lib/charms/mongodb/v1/mongodb_backups.py +++ b/lib/charms/mongodb/v1/mongodb_backups.py @@ -18,6 +18,7 @@ from charms.data_platform_libs.v0.s3 import CredentialsChangedEvent, S3Requirer from charms.mongodb.v1.helpers import current_pbm_op, process_pbm_status from charms.operator_libs_linux.v1 import snap +from ops.charm import RelationJoinedEvent from ops.framework import Object from ops.model import BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus from ops.pebble import ExecError @@ -40,7 +41,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 5 +LIBPATCH = 6 logger = logging.getLogger(__name__) @@ -122,8 +123,13 @@ def __init__(self, charm): self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups_action) self.framework.observe(self.charm.on.restore_action, self._on_restore_action) - def on_s3_relation_joined(self, _) -> None: + def on_s3_relation_joined(self, event: RelationJoinedEvent) -> None: """Checks for valid integration for s3-integrations.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if not self.is_valid_s3_integration(): logger.debug( "Shard does not support s3 relations, please relate s3-integrator to config-server only." @@ -323,6 +329,11 @@ def _pass_sanity_checks(self, event, action) -> bool: No matter what backup-action is being run, these requirements must be met. """ + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if not self.is_valid_s3_integration(): self._fail_action_with_error_log( event, diff --git a/lib/charms/mongodb/v1/mongodb_provider.py b/lib/charms/mongodb/v1/mongodb_provider.py index 7d8c9b340..1b59ea7ab 100644 --- a/lib/charms/mongodb/v1/mongodb_provider.py +++ b/lib/charms/mongodb/v1/mongodb_provider.py @@ -16,7 +16,7 @@ from charms.data_platform_libs.v0.data_interfaces import DatabaseProvides from charms.mongodb.v0.mongodb import MongoDBConfiguration, MongoDBConnection from charms.mongodb.v1.helpers import generate_password -from ops.charm import CharmBase, RelationBrokenEvent, RelationChangedEvent +from ops.charm import CharmBase, EventBase, RelationBrokenEvent, RelationChangedEvent from ops.framework import Object from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, Relation from pymongo.errors import PyMongoError @@ -31,7 +31,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 4 +LIBPATCH = 5 logger = logging.getLogger(__name__) REL_NAME = "database" @@ -85,10 +85,15 @@ def __init__(self, charm: CharmBase, substrate="k8s", relation_name: str = "data self.database_provides.on.database_requested, self._on_relation_event ) - def pass_hook_checks(self) -> bool: + def pass_hook_checks(self, event: EventBase) -> bool: """Runs the pre-hooks checks for MongoDBProvider, returns True if all pass.""" # We shouldn't try to create or update users if the database is not # initialised. We will create users as part of initialisation. + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if not self.charm.db_initialised: return False @@ -115,7 +120,7 @@ def _on_relation_event(self, event): data. As a result, related charm gets credentials for accessing the MongoDB database. """ - if not self.pass_hook_checks(): + if not self.pass_hook_checks(event): logger.info("Skipping %s: hook checks did not pass", type(event)) return diff --git a/lib/charms/mongodb/v1/shards_interface.py b/lib/charms/mongodb/v1/shards_interface.py index 9253e1450..fda0315bd 100644 --- a/lib/charms/mongodb/v1/shards_interface.py +++ b/lib/charms/mongodb/v1/shards_interface.py @@ -31,7 +31,13 @@ ShardNotPlannedForRemovalError, ) from charms.mongodb.v1.users import BackupUser, MongoDBUser, OperatorUser -from ops.charm import CharmBase, EventBase, RelationBrokenEvent, RelationChangedEvent +from ops.charm import ( + CharmBase, + EventBase, + RelationBrokenEvent, + RelationChangedEvent, + RelationJoinedEvent, +) from ops.framework import Object from ops.model import ( ActiveStatus, @@ -56,7 +62,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 13 +LIBPATCH = 14 KEYFILE_KEY = "key-file" HOSTS_KEY = "host" OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username()) @@ -140,6 +146,11 @@ def _on_relation_joined(self, event): def pass_hook_checks(self, event: EventBase) -> bool: """Runs the pre-hooks checks for ShardingProvider, returns True if all pass.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if not self.charm.db_initialised: logger.info("Deferring %s. db is not initialised.", str(type(event))) event.defer() @@ -536,6 +547,11 @@ def _handle_changed_secrets(self, event) -> None: Changes in secrets do not re-trigger a relation changed event, so it is necessary to listen to secret changes events. """ + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if ( not self.charm.unit.is_leader() or not event.secret.label @@ -637,8 +653,13 @@ def sync_cluster_passwords( # after updating the password of the backup user, restart pbm with correct password self.charm._connect_pbm_agent() - def _on_relation_joined(self, _): + def _on_relation_joined(self, event: RelationJoinedEvent): """Sets status and flags in relation data relevant to sharding.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + # if re-using an old shard, re-set flags. self.charm.unit_peer_data["drained"] = json.dumps(False) self.charm.unit.status = MaintenanceStatus("Adding shard to config-server") @@ -687,6 +708,11 @@ def _on_relation_changed(self, event): def pass_hook_checks(self, event): """Runs the pre-hooks checks for ConfigServerRequirer, returns True if all pass.""" + if not self.charm.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return False + if not self.charm.db_initialised: logger.info("Deferring %s. db is not initialised.", str(type(event))) event.defer() diff --git a/requirements.txt b/requirements.txt index b816291fc..73c27b208 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,6 @@ zipp==3.11.0 pyOpenSSL==22.1.0 typing-extensions==4.5.0 parameterized==0.9.0 +# Future PR - use poetry in MongoDB Charm +poetry==1.8.2 pydantic==1.10.7 diff --git a/src/charm.py b/src/charm.py index bf5519904..1a8de1791 100755 --- a/src/charm.py +++ b/src/charm.py @@ -76,7 +76,7 @@ from pymongo.errors import OperationFailure, ServerSelectionTimeoutError from tenacity import Retrying, before_log, retry, stop_after_attempt, wait_fixed -from config import Config +from config import Config, Package from exceptions import AdminUserCreationError, ApplicationHostNotFoundError from machine_helpers import MONGO_USER, ROOT_USER_GID, update_mongod_service @@ -184,7 +184,7 @@ def primary(self) -> str: return self.unit.name # check if peer unit matches primary ip - for unit in self._peers.units: + for unit in self.peers.units: if primary_ip == self._unit_ip(unit): return unit.name @@ -207,8 +207,8 @@ def _unit_ips(self) -> List[str]: a list of IP address associated with MongoDB application. """ peer_addresses = [] - if self._peers: - peer_addresses = [self._unit_ip(unit) for unit in self._peers.units] + if self.peers: + peer_addresses = [self._unit_ip(unit) for unit in self.peers.units] logger.debug("peer addresses: %s", peer_addresses) self_address = self._unit_ip(self.unit) @@ -260,21 +260,21 @@ def backup_config(self) -> MongoDBConfiguration: @property def unit_peer_data(self) -> Dict: """Peer relation data object.""" - if not self._peers: + if not self.peers: return {} - return self._peers.data[self.unit] + return self.peers.data[self.unit] @property def app_peer_data(self) -> Dict: """Peer relation data object.""" - if not self._peers: + if not self.peers: return {} - return self._peers.data[self.app] + return self.peers.data[self.app] @property - def _peers(self) -> Optional[Relation]: + def peers(self) -> Optional[Relation]: """Fetch the peer relation. Returns: @@ -330,7 +330,7 @@ def _on_install(self, event: InstallEvent) -> None: """Handle the install event (fired on startup).""" self.unit.status = MaintenanceStatus("installing MongoDB") try: - self._install_snap_packages(packages=Config.SNAP_PACKAGES) + self.install_snap_packages(packages=Config.SNAP_PACKAGES) except snap.SnapError: self.unit.status = BlockedStatus("couldn't install MongoDB") @@ -368,8 +368,8 @@ def _on_config_changed(self, event: ConfigChangedEvent) -> None: unresponsive therefore causing a cluster failure, error the component. This prevents it from executing other hooks with a new role. """ - # TODO in the future (24.04) support migration of components - if self.is_role_changed(): + if self.upgrade.idle and self.is_role_changed(): + # TODO in the future (24.04) support migration of components logger.error( f"cluster migration currently not supported, cannot change from { self.model.config['role']} to {self.role}" ) @@ -390,7 +390,7 @@ def _on_start(self, event: StartEvent) -> None: try: logger.debug("starting MongoDB.") self.unit.status = MaintenanceStatus("starting MongoDB") - self.start_mongod_service() + self.start_charm_services() self.unit.status = ActiveStatus() except snap.SnapError as e: logger.error("An exception occurred when starting mongod agent, error: %s.", str(e)) @@ -442,6 +442,11 @@ def _on_relation_joined(self, event: RelationJoinedEvent) -> None: if not self.unit.is_leader(): return + if not self.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + self._on_relation_handler(event) self._update_related_hosts(event) @@ -452,6 +457,11 @@ def _on_relation_handler(self, event: RelationEvent) -> None: Args: event: The triggering relation joined/changed event. """ + if not self.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + # changing the monitor password will lead to non-leader units receiving a relation changed # event. We must update the monitor and pbm URI if the password changes so that COS/pbm # can continue to work @@ -495,6 +505,11 @@ def _on_relation_handler(self, event: RelationEvent) -> None: def _on_leader_elected(self, event: LeaderElectedEvent) -> None: """Generates necessary keyfile and updates replica hosts.""" + if not self.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + if not self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME): self._generate_secrets() @@ -510,6 +525,11 @@ def _on_relation_departed(self, event: RelationDepartedEvent) -> None: if not self.unit.is_leader() or event.departing_unit == self.unit: return + if not self.upgrade.idle: + logger.info("cannot process %s, upgrade is in progress", event) + event.defer() + return + self._update_hosts(event) def _on_storage_detaching(self, event: StorageDetachingEvent) -> None: @@ -559,6 +579,10 @@ def _on_storage_detaching(self, event: StorageDetachingEvent) -> None: logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e) def _on_update_status(self, event: UpdateStatusEvent): + if not self.upgrade.idle: + logger.info("Processing upgrade, wait to check status") + return + # user-made mistakes might result in other incorrect statues. Prioritise informing users of # their mistake. invalid_integration_status = self.get_invalid_integration_status() @@ -608,6 +632,10 @@ def _on_get_password(self, event: ActionEvent) -> None: def _on_set_password(self, event: ActionEvent) -> None: """Set the password for the admin user.""" + if not self.upgrade.idle: + event.fail("Cannot set password, upgrade is in progress.") + return + # check conditions for setting the password and fail if necessary if not self.pass_pre_set_password_checks(event): return @@ -943,7 +971,7 @@ def _open_ports_tcp(self, ports: int) -> None: logger.exception("failed opening port: %s", str(e)) raise - def _install_snap_packages(self, packages: List[str]) -> None: + def install_snap_packages(self, packages: List[Package]) -> None: """Installs package(s) to container. Args: @@ -1142,7 +1170,7 @@ def _initialise_replica_set(self, event: StartEvent) -> None: try: logger.info("Replica Set initialization") direct_mongo.init_replset() - self._peers.data[self.app]["replica_set_hosts"] = json.dumps( + self.peers.data[self.app]["replica_set_hosts"] = json.dumps( [self._unit_ip(self.unit)] ) @@ -1179,8 +1207,8 @@ def _unit_ip(self, unit: Unit) -> str: if unit == self.unit: return str(self.model.get_binding(Config.Relations.PEERS).network.bind_address) # check if host is a peer - elif unit in self._peers.data: - return str(self._peers.data[unit].get("private-address")) + elif unit in self.peers.data: + return str(self.peers.data[unit].get("private-address")) # raise exception if host not found else: raise ApplicationHostNotFoundError @@ -1232,7 +1260,7 @@ def remove_secret(self, scope, key) -> None: content[key] = Config.Secrets.SECRET_DELETED_LABEL secret.set_content(content) - def start_mongod_service(self): + def start_charm_services(self): """Starts the mongod service and if necessary starts mongos. Raises: @@ -1246,7 +1274,7 @@ def start_mongod_service(self): if self.is_role(Config.Role.CONFIG_SERVER): mongodb_snap.start(services=["mongos"], enable=True) - def stop_mongod_service(self): + def stop_charm_services(self): """Stops the mongod service and if necessary stops mongos. Raises: @@ -1266,14 +1294,14 @@ def restart_charm_services(self, auth=None): auth = self.auth_enabled() try: - self.stop_mongod_service() + self.stop_charm_services() update_mongod_service( auth, self._unit_ip(self.unit), config=self.mongodb_config, role=self.role, ) - self.start_mongod_service() + self.start_charm_services() except snap.SnapError as e: logger.error("An exception occurred when starting mongod agent, error: %s.", str(e)) self.unit.status = BlockedStatus("couldn't start MongoDB") @@ -1338,10 +1366,10 @@ def _scope_obj(self, scope: Scopes): return self.unit def _peer_data(self, scope: Scopes): - if not self._peers: + if not self.peers: return {}.setdefault(scope, {}) scope_obj = self._scope_obj(scope) - return self._peers.data[scope_obj] + return self.peers.data[scope_obj] def check_relation_broken_or_scale_down(self, event: RelationDepartedEvent) -> None: """Checks relation departed event is the result of removed relation or scale down. @@ -1403,7 +1431,7 @@ def _generate_relation_departed_key(rel_id: int) -> str: @property def _is_removing_last_replica(self) -> bool: """Returns True if the last replica (juju unit) is getting removed.""" - return self.app.planned_units() == 0 and len(self._peers.units) == 0 + return self.app.planned_units() == 0 and len(self.peers.units) == 0 def get_invalid_integration_status(self) -> Optional[StatusBase]: """Returns a status if an invalid integration is present.""" diff --git a/src/config.py b/src/config.py index 76508c650..dd9a398f7 100644 --- a/src/config.py +++ b/src/config.py @@ -4,7 +4,9 @@ # See LICENSE file for licensing details. -from typing import Literal +from typing import Literal, TypeAlias + +Package: TypeAlias = tuple[str, str, str] class Config: @@ -22,14 +24,10 @@ class Config: "mongod_service": { "dependencies": {}, "name": "mongod", - # this should be">4,<8" - but we get pydantic.error_wrappers.ValidationError - resolve - # in DPE-3940 - "upgrade_supported": ">4", - # this should be "6.0.6-5" - but we get pydantic.error_wrappers.ValidationError - - # resolve in DPE-3940 + "upgrade_supported": "^6.0.0,<7", "version": "6.0.6", }, - # TODO: Future PR DPE-3940 - implements mongos if necessary + # TODO: Future PR - implement mongos deps when supporting sharding upgrades } # Keep these alphabetically sorted