Skip to content

Commit

Permalink
[DPE-4906] - Make libs k8s friendly (#438)
Browse files Browse the repository at this point in the history
## Issue
1. MongoDB K8s charm cannot use some of the shared code in the lib
2. MongoDB K8s charm is copying over an excessive amount of the status
code

## Solution
1. use a universal name to retrieve hosts across charms
2. move shared status code to `set_status` lib

## Lib versions
currently many of the library API + Patch are out of date, so we do not
bump the versions here [this PR when it lands will resolve
them](#437)

---------

Co-authored-by: Nehalenniæ Oudin <[email protected]>
  • Loading branch information
MiaAltieri and Gu1nness authored Jul 26, 2024
1 parent eeac0fd commit 437b04a
Show file tree
Hide file tree
Showing 12 changed files with 181 additions and 161 deletions.
2 changes: 1 addition & 1 deletion charm_internal_version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3
4
2 changes: 1 addition & 1 deletion lib/charms/mongodb/v0/config_server_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def generate_config_server_db(self) -> str:
"""Generates the config server database for mongos to connect to."""
replica_set_name = self.charm.app.name
hosts = []
for host in self.charm.unit_ips:
for host in self.charm.app_hosts:
hosts.append(f"{host}:{Config.MONGODB_PORT}")

hosts = ",".join(hosts)
Expand Down
9 changes: 1 addition & 8 deletions lib/charms/mongodb/v0/mongodb_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from ops.charm import ActionEvent, RelationBrokenEvent, RelationJoinedEvent
from ops.framework import Object
from ops.model import ActiveStatus, MaintenanceStatus, Unit, WaitingStatus
from ops.model import ActiveStatus, MaintenanceStatus, WaitingStatus

from config import Config

Expand Down Expand Up @@ -333,13 +333,6 @@ def get_tls_files(self, internal: bool) -> Tuple[Optional[str], Optional[str]]:

return ca_file, pem_file

def get_host(self, unit: Unit):
"""Retrieves the hostname of the unit based on the substrate."""
if self.substrate == "vm":
return self.charm.unit_ip(unit)
else:
return self.charm.get_hostname_for_unit(unit)

def set_tls_secret(self, internal: bool, label_name: str, contents: str) -> None:
"""Sets TLS secret, based on whether or not it is related to internal connections."""
scope = "int" if internal else "ext"
Expand Down
108 changes: 107 additions & 1 deletion lib/charms/mongodb/v0/set_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
import json
import logging
from typing import Tuple

from charms.mongodb.v0.mongodb import MongoDBConfiguration, MongoDBConnection
from ops.charm import CharmBase
from ops.framework import Object
from ops.model import ActiveStatus, StatusBase, WaitingStatus
from ops.model import ActiveStatus, BlockedStatus, StatusBase, WaitingStatus
from pymongo.errors import AutoReconnect, OperationFailure, ServerSelectionTimeoutError

from config import Config

Expand All @@ -20,6 +24,13 @@
# to 0 if you are raising the major API version
LIBPATCH = 3

AUTH_FAILED_CODE = 18
UNAUTHORISED_CODE = 13
TLS_CANNOT_FIND_PRIMARY = 133


logger = logging.getLogger(__name__)


class MongoDBStatusHandler(Object):
"""Verifies versions across multiple integrated applications."""
Expand Down Expand Up @@ -150,4 +161,99 @@ def is_unit_status_ready_for_upgrade(self) -> bool:

return False

def process_statuses(self) -> StatusBase:
"""Retrieves statuses from processes inside charm and returns the highest priority status.
When a non-fatal error occurs while processing statuses, the error is processed and
returned as a statuses.
TODO: add more status handling here for other cases: i.e. TLS, or resetting a status that
should not be reset
"""
# retrieve statuses of different services running on Charmed MongoDB
deployment_mode = (
"replica set" if self.charm.is_role(Config.Role.REPLICATION) else "cluster"
)
waiting_status = None
try:
statuses = self.get_statuses()
except OperationFailure as e:
if e.code in [UNAUTHORISED_CODE, AUTH_FAILED_CODE]:
waiting_status = f"Waiting to sync passwords across the {deployment_mode}"
elif e.code == TLS_CANNOT_FIND_PRIMARY:
waiting_status = (
f"Waiting to sync internal membership across the {deployment_mode}"
)
else:
raise
except ServerSelectionTimeoutError:
waiting_status = f"Waiting to sync internal membership across the {deployment_mode}"

if waiting_status:
return WaitingStatus(waiting_status)

return self.prioritize_statuses(statuses)

def get_statuses(self) -> Tuple:
"""Retrieves statuses for the different processes running inside the unit."""
mongodb_status = build_unit_status(
self.charm.mongodb_config, self.charm.unit_host(self.charm.unit)
)
shard_status = self.charm.shard.get_shard_status()
config_server_status = self.charm.config_server.get_config_server_status()
pbm_status = self.charm.backups.get_pbm_status()
return (mongodb_status, shard_status, config_server_status, pbm_status)

def prioritize_statuses(self, statuses: Tuple) -> StatusBase:
"""Returns the status with the highest priority from backups, sharding, and mongod."""
mongodb_status, shard_status, config_server_status, pbm_status = statuses
# failure in mongodb takes precedence over sharding and config server
if not isinstance(mongodb_status, ActiveStatus):
return mongodb_status

if shard_status and not isinstance(shard_status, ActiveStatus):
return shard_status

if config_server_status and not isinstance(config_server_status, ActiveStatus):
return config_server_status

if pbm_status and not isinstance(pbm_status, ActiveStatus):
return pbm_status

# if all statuses are active report mongodb status over sharding status
return mongodb_status


def build_unit_status(mongodb_config: MongoDBConfiguration, unit_host: str) -> StatusBase:
"""Generates the status of a unit based on its status reported by mongod."""
try:
with MongoDBConnection(mongodb_config) as mongo:
replset_status = mongo.get_replset_status()

if unit_host not in replset_status:
return WaitingStatus("Member being added..")

replica_status = replset_status[unit_host]

match replica_status:
case "PRIMARY":
return ActiveStatus("Primary")
case "SECONDARY":
return ActiveStatus("")
case "STARTUP" | "STARTUP2" | "ROLLBACK" | "RECOVERING":
return WaitingStatus("Member is syncing...")
case "REMOVED":
return WaitingStatus("Member is removing...")
case _:
return BlockedStatus(replica_status)
except ServerSelectionTimeoutError as e:
# ServerSelectionTimeoutError is commonly due to ReplicaSetNoPrimary
logger.debug("Got error: %s, while checking replica set status", str(e))
return WaitingStatus("Waiting for primary re-election..")
except AutoReconnect as e:
# AutoReconnect is raised when a connection to the database is lost and an attempt to
# auto-reconnect will be made by pymongo.
logger.debug("Got error: %s, while checking replica set status", str(e))
return WaitingStatus("Waiting to reconnect to unit..")

# END: Helpers
43 changes: 2 additions & 41 deletions lib/charms/mongodb/v1/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,8 @@
import subprocess
from typing import List

from charms.mongodb.v0.mongodb import MongoDBConfiguration, MongoDBConnection
from ops.model import (
ActiveStatus,
BlockedStatus,
MaintenanceStatus,
StatusBase,
WaitingStatus,
)
from pymongo.errors import AutoReconnect, ServerSelectionTimeoutError
from charms.mongodb.v0.mongodb import MongoDBConfiguration
from ops.model import ActiveStatus, MaintenanceStatus, StatusBase, WaitingStatus

from config import Config

Expand Down Expand Up @@ -273,38 +266,6 @@ def generate_keyfile() -> str:
return "".join([secrets.choice(choices) for _ in range(1024)])


def build_unit_status(mongodb_config: MongoDBConfiguration, unit_ip: str) -> StatusBase:
"""Generates the status of a unit based on its status reported by mongod."""
try:
with MongoDBConnection(mongodb_config) as mongo:
replset_status = mongo.get_replset_status()

if unit_ip not in replset_status:
return WaitingStatus("Member being added..")

replica_status = replset_status[unit_ip]

if replica_status == "PRIMARY":
return ActiveStatus("Primary")
elif replica_status == "SECONDARY":
return ActiveStatus("")
elif replica_status in ["STARTUP", "STARTUP2", "ROLLBACK", "RECOVERING"]:
return WaitingStatus("Member is syncing...")
elif replica_status == "REMOVED":
return WaitingStatus("Member is removing...")
else:
return BlockedStatus(replica_status)
except ServerSelectionTimeoutError as e:
# ServerSelectionTimeoutError is commonly due to ReplicaSetNoPrimary
logger.debug("Got error: %s, while checking replica set status", str(e))
return WaitingStatus("Waiting for primary re-election..")
except AutoReconnect as e:
# AutoReconnect is raised when a connection to the database is lost and an attempt to
# auto-reconnect will be made by pymongo.
logger.debug("Got error: %s, while checking replica set status", str(e))
return WaitingStatus("Waiting to reconnect to unit..")


def copy_licenses_to_unit():
"""Copies licenses packaged in the snap to the charm's licenses directory."""
os.makedirs("src/licenses", exist_ok=True)
Expand Down
2 changes: 1 addition & 1 deletion lib/charms/mongodb/v1/mongodb_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ def retrieve_error_message(self, pbm_status: Dict) -> str:

for host_info in cluster["nodes"]:
replica_info = (
f"mongodb/{self.charm.unit_ip(self.charm.unit)}:{Config.MONGOS_PORT}"
f"mongodb/{self.charm.unit_host(self.charm.unit)}:{Config.MONGOS_PORT}"
)
if host_info["host"] == replica_info:
break
Expand Down
32 changes: 24 additions & 8 deletions lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,15 @@ class ShardingProvider(Object):
"""Manage relations between the config server and the shard, on the config-server's side."""

def __init__(
self, charm: CharmBase, relation_name: str = Config.Relations.CONFIG_SERVER_RELATIONS_NAME
self,
charm: CharmBase,
relation_name: str = Config.Relations.CONFIG_SERVER_RELATIONS_NAME,
substrate="k8s",
) -> None:
"""Constructor for ShardingProvider object."""
self.relation_name = relation_name
self.charm = charm
self.substrate = substrate
self.database_provides = DatabaseProvides(self.charm, relation_name=self.relation_name)

super().__init__(charm, self.relation_name)
Expand Down Expand Up @@ -133,7 +137,7 @@ def _on_relation_joined(self, event):
KEYFILE_KEY: self.charm.get_secret(
Config.Relations.APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME
),
HOSTS_KEY: json.dumps(self.charm.unit_ips),
HOSTS_KEY: json.dumps(self.charm.app_hosts),
}

# if tls enabled
Expand Down Expand Up @@ -343,7 +347,7 @@ def update_mongos_hosts(self):
return

for relation in self.charm.model.relations[self.relation_name]:
self._update_relation_data(relation.id, {HOSTS_KEY: json.dumps(self.charm.unit_ips)})
self._update_relation_data(relation.id, {HOSTS_KEY: json.dumps(self.charm.app_hosts)})

def update_ca_secret(self, new_ca: str) -> None:
"""Updates the new CA for all related shards."""
Expand Down Expand Up @@ -437,12 +441,20 @@ def _get_shard_hosts(self, shard_name) -> List[str]:
"""Retrieves the hosts for a specified shard."""
relations = self.model.relations[self.relation_name]
for relation in relations:
if self._get_shard_name_from_relation(relation) == shard_name:
hosts = []
for unit in relation.units:
if self._get_shard_name_from_relation(relation) != shard_name:
continue

hosts = []
for unit in relation.units:
if self.substrate == "k8s":
unit_name = unit.name.split("/")[0]
unit_id = unit.name.split("/")[1]
host_name = f"{unit_name}-{unit_id}.{unit_name}-endpoints"
hosts.append(host_name)
else:
hosts.append(relation.data[unit].get("private-address"))

return hosts
return hosts

def _get_shard_name_from_relation(self, relation):
"""Returns the name of a shard for a specified relation."""
Expand Down Expand Up @@ -483,7 +495,7 @@ def get_unreachable_shards(self) -> List[str]:

def is_mongos_running(self) -> bool:
"""Returns true if mongos service is running."""
mongos_hosts = ",".join(self.charm.unit_ips)
mongos_hosts = ",".join(self.charm.app_hosts)
uri = f"mongodb://{mongos_hosts}"
with MongosConnection(None, uri) as mongo:
return mongo.is_ready
Expand Down Expand Up @@ -1088,6 +1100,10 @@ def _is_mongos_reachable(self, with_auth=False) -> bool:
def _is_added_to_cluster(self) -> bool:
"""Returns True if the shard has been added to the cluster."""
try:
# edge cases: not integrated to config-server or not yet received enough information
# to be added
if not self.get_config_server_name() or not self.get_mongos_hosts():
return False
cluster_shards = self.get_shard_members()
return self.charm.app.name in cluster_shards
except OperationFailure as e:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ markers = ["unstable"]
# Formatting tools configuration
[tool.black]
line-length = 99
target-version = ["py38"]
target-version = ["py310"]

[tool.isort]
profile = "black"
Expand Down
Loading

0 comments on commit 437b04a

Please sign in to comment.