canonical · Zvirovyi · Nov 15, 2024 · Nov 15, 2024 · Nov 21, 2024 · Dec 4, 2024
diff --git a/actions.yaml b/actions.yaml
@@ -55,6 +55,9 @@ restore:
     backup-id:
       type: string
       description: A backup-id to identify the backup to restore (format = %Y-%m-%dT%H:%M:%SZ)
+    restore-to-time:
+      type: string
+      description: Point-in-time-recovery target in MySQL timestamp format.
 
 pre-upgrade-check:
   description: Run necessary pre-upgrade checks and preparations before executing a charm refresh.

diff --git a/lib/charms/mysql/v0/backups.py b/lib/charms/mysql/v0/backups.py
@@ -46,12 +46,18 @@ def is_unit_blocked(self) -> bool:
 """
 
 import datetime
+import io
 import logging
 import pathlib
 import typing
 from typing import Dict, List, Optional, Tuple
 
-from charms.data_platform_libs.v0.s3 import S3Requirer
+import yaml
+from charms.data_platform_libs.v0.s3 import (
+    CredentialsChangedEvent,
+    CredentialsGoneEvent,
+    S3Requirer,
+)
 from charms.mysql.v0.mysql import (
     MySQLConfigureInstanceError,
     MySQLCreateClusterError,
@@ -67,6 +73,7 @@ def is_unit_blocked(self) -> bool:
     MySQLPrepareBackupForRestoreError,
     MySQLRescanClusterError,
     MySQLRestoreBackupError,
+    MySQLRestorePitrError,
     MySQLRetrieveBackupWithXBCloudError,
     MySQLServiceNotRunningError,
     MySQLSetInstanceOfflineModeError,
@@ -76,6 +83,7 @@ def is_unit_blocked(self) -> bool:
     MySQLUnableToGetMemberStateError,
 )
 from charms.mysql.v0.s3_helpers import (
+    ensure_s3_compatible_group_replication_id,
     fetch_and_check_existence_of_s3_path,
     list_backups_in_s3_path,
     upload_content_to_s3,
@@ -85,7 +93,12 @@ def is_unit_blocked(self) -> bool:
 from ops.jujuversion import JujuVersion
 from ops.model import BlockedStatus, MaintenanceStatus
 
-from constants import MYSQL_DATA_DIR
+from constants import (
+    MYSQL_BINLOGS_COLLECTOR_CONFIG_FILE,
+    MYSQL_DATA_DIR,
+    SERVER_CONFIG_PASSWORD_KEY,
+    SERVER_CONFIG_USERNAME,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -102,6 +115,10 @@ def is_unit_blocked(self) -> bool:
 # to 0 if you are raising the major API version
 LIBPATCH = 12
 
+ANOTHER_S3_CLUSTER_REPOSITORY_ERROR_MESSAGE = "S3 repository claimed by another cluster"
+MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR = (
+    "Move restored cluster to another S3 repository"
+)
 
 if typing.TYPE_CHECKING:
     from charm import MySQLOperatorCharm
@@ -119,6 +136,13 @@ def __init__(self, charm: "MySQLOperatorCharm", s3_integrator: S3Requirer) -> No
         self.framework.observe(self.charm.on.create_backup_action, self._on_create_backup)
         self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups)
         self.framework.observe(self.charm.on.restore_action, self._on_restore)
+        self.framework.observe(
+            self.s3_integrator.on.credentials_changed, self._on_s3_credentials_changed
+        )
+        self.framework.observe(self.charm.on.leader_elected, self._on_s3_credentials_changed)
+        self.framework.observe(
+            self.s3_integrator.on.credentials_gone, self._on_s3_credentials_gone
+        )
 
     # ------------------ Helpers ------------------
     @property
@@ -235,18 +259,33 @@ def _on_list_backups(self, event: ActionEvent) -> None:
 
     # ------------------ Create Backup ------------------
 
-    def _on_create_backup(self, event: ActionEvent) -> None:
-        """Handle the create backup action."""
-        logger.info("A backup has been requested on unit")
+    def _pre_create_backup_checks(self, event: ActionEvent) -> bool:
+        """Run some checks before creating the backup.
 
+        Returns: a boolean indicating whether operation should be run.
+        """
         if not self._s3_integrator_relation_exists:
             logger.error("Backup failed: missing relation with S3 integrator charm")
             event.fail("Missing relation with S3 integrator charm")
-            return
+            return False
+
+        if "s3-block-message" in self.charm.app_peer_data:
+            logger.error("Backup failed: S3 relation is blocked for write")
+            event.fail("S3 relation is blocked for write")
+            return False
 
         if not self.charm._mysql.is_mysqld_running():
             logger.error(f"Backup failed: process mysqld is not running on {self.charm.unit.name}")
             event.fail("Process mysqld not running")
+            return False
+
+        return True
+
+    def _on_create_backup(self, event: ActionEvent) -> None:
+        """Handle the create backup action."""
+        logger.info("A backup has been requested on unit")
+
+        if not self._pre_create_backup_checks(event):
             return
 
         datetime_backup_requested = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -479,7 +518,7 @@ def _pre_restore_checks(self, event: ActionEvent) -> bool:
 
         return True
 
-    def _on_restore(self, event: ActionEvent) -> None:
+    def _on_restore(self, event: ActionEvent) -> None:  # noqa: C901
         """Handle the restore backup action event.
 
         Restore a backup from S3 (parameters for which can retrieved from the
@@ -489,7 +528,12 @@ def _on_restore(self, event: ActionEvent) -> None:
             return
 
         backup_id = event.params["backup-id"].strip().strip("/")
-        logger.info(f"A restore with backup-id {backup_id} has been requested on unit")
+        restore_to_time = event.params.get("restore-to-time")
+        logger.info(
+            f"A restore with backup-id {backup_id}"
+            f"{f' to time point {restore_to_time}' if restore_to_time else ''}"
+            f" has been requested on the unit"
+        )
 
         # Retrieve and validate missing S3 parameters
         s3_parameters, missing_parameters = self._retrieve_s3_parameters()
@@ -519,14 +563,41 @@ def _on_restore(self, event: ActionEvent) -> None:
         if not success:
             logger.error(f"Restore failed: {error_message}")
             event.fail(error_message)
-
             if recoverable:
                 self._clean_data_dir_and_start_mysqld()
             else:
+                self.charm.app_peer_data.update({
+                    "s3-block-message": MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR,
+                    "binlogs-collecting": "",
 self.charm.app_peer_data.update({ 
     "s3-block-message": MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR, 
     "binlogs-collecting": "", 
 }) 
 self.charm.app_peer_data.update({ 
     "s3-block-message": MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR, 
     "binlogs-collecting": "", 
 }) 
+                })
+                if not self.charm._mysql.start_stop_binlogs_collecting():
+                    logger.error("Failed to stop binlogs collecting after failed restore")
                 self.charm.unit.status = BlockedStatus(error_message)
+            return
+
+        self.charm.app_peer_data.update({
+            "s3-block-message": MOVE_RESTORED_CLUSTER_TO_ANOTHER_S3_REPOSITORY_ERROR,
+            "binlogs-collecting": "",
+        })
+        if not self.charm._mysql.start_stop_binlogs_collecting():
+            logger.error("Failed to stop binlogs collecting prior to restore")
 
+        success, error_message = self._clean_data_dir_and_start_mysqld()
+        if not success:
+            logger.error(f"Restore failed: {error_message}")
+            self.charm.unit.status = BlockedStatus(error_message)
+            event.fail(error_message)
             return
 
+        if restore_to_time is not None:
+            self.charm.unit.status = MaintenanceStatus("Running point-in-time-recovery operations")
+            success, error_message = self._pitr_restore(restore_to_time, s3_parameters)
+            if not success:
+                logger.error(f"Restore failed: {error_message}")
+                event.fail(error_message)
+                self.charm.unit.status = BlockedStatus(error_message)
+                return
+
         # Run post-restore operations
         self.charm.unit.status = MaintenanceStatus("Running post-restore operations")
         success, error_message = self._post_restore()
@@ -636,15 +707,29 @@ def _clean_data_dir_and_start_mysqld(self) -> Tuple[bool, str]:
 
         return True, ""
 
+    def _pitr_restore(
+        self, restore_to_time: str, s3_parameters: Dict[str, str]
+    ) -> Tuple[bool, str]:
+        try:
+            logger.info("Restoring point-in-time-recovery")
+            stdout, stderr = self.charm._mysql.restore_pitr(
+                host=self.charm.get_unit_address(self.charm.unit),
+                mysql_user=SERVER_CONFIG_USERNAME,
+                password=self.charm.get_secret("app", SERVER_CONFIG_PASSWORD_KEY),
+                s3_parameters=s3_parameters,
+                restore_to_time=restore_to_time,
+            )
+            logger.debug(f"Stdout of mysql-pitr-helper restore command: {stdout}")
+            logger.debug(f"Stderr of mysql-pitr-helper restore command: {stderr}")
+        except MySQLRestorePitrError:
+            return False, f"Failed to restore point-in-time-recovery to the {restore_to_time}"
+        return True, ""
+
     def _post_restore(self) -> Tuple[bool, str]:
         """Run operations required after restoring a backup.
 
         Returns: tuple of (success, error_message)
         """
-        success, error_message = self._clean_data_dir_and_start_mysqld()
-        if not success:
-            return success, error_message
-
         try:
             logger.info("Configuring instance to be part of an InnoDB cluster")
             self.charm._mysql.configure_instance(create_cluster_admin=False)
@@ -674,3 +759,108 @@ def _post_restore(self) -> Tuple[bool, str]:
             return False, "Failed to rescan the cluster"
 
         return True, ""
+
+    def _on_s3_credentials_changed(self, event: CredentialsChangedEvent) -> None:
+        if not self.charm.unit.is_leader():
+            logger.debug("Early exit on _on_s3_credentials_changed: unit is not a leader")
+            return
+
+        if not self._s3_integrator_relation_exists:
+            logger.debug(
+                "Early exit on _on_s3_credentials_changed: s3 integrator relation does not exist"
+            )
+            return
+
+        if (
+            not self.charm._mysql.is_mysqld_running()
+            or not self.charm.unit_initialized
+            or not self.charm.upgrade.idle
+        ):
+            logger.debug(
+                "Deferring _on_s3_credentials_changed: mysql cluster is not started yet or upgrade is occurring"
+            )
+            event.defer()
+            return
+
+        logger.info("Retrieving s3 parameters from the s3-integrator relation")
+        s3_parameters, missing_parameters = self._retrieve_s3_parameters()
+        if missing_parameters:
+            logger.error(f"Missing S3 parameters: {missing_parameters}")
+            return
+
+        logger.info("Ensuring compatibility with the provided S3 repository")
+        if ensure_s3_compatible_group_replication_id(
+            self.charm._mysql.get_current_group_replication_id(), s3_parameters
+        ):
+            self.charm.app_peer_data.update({
+                "s3-block-message": "",
+                "binlogs-collecting": "true",
+            })
+        else:
+            self.charm.app_peer_data.update({
+                "s3-block-message": ANOTHER_S3_CLUSTER_REPOSITORY_ERROR_MESSAGE,
+                "binlogs-collecting": "",
+            })
+
+        if not self.charm._mysql.start_stop_binlogs_collecting(True):
+            logger.error("Failed to restart binlogs collecting after S3 relation update")
+
+    def _on_s3_credentials_gone(self, event: CredentialsGoneEvent) -> None:
+        if not self.charm.unit.is_leader():
+            logger.debug("Early exit on _on_s3_credentials_gone: unit is not a leader")
+            return
+        self.charm.app_peer_data.update({
+            "s3-block-message": "",
+            "binlogs-collecting": "",
+        })
+        if not self.charm._mysql.start_stop_binlogs_collecting():
+            logger.error("Failed to stop binlogs collecting after S3 relation depart")
+
+    def update_binlogs_collector_config(self) -> bool:
+        """Update binlogs collector service config file.
+
+        Returns: whether this operation was successful.
+        """
+        if not self._s3_integrator_relation_exists:
+            logger.error(
+                "Cannot update binlogs collector config: s3 integrator relation does not exist"
+            )
+            return False
+
+        logger.info("Retrieving s3 parameters from the s3-integrator relation")
+        s3_parameters, missing_parameters = self._retrieve_s3_parameters()
+        if missing_parameters:
+            logger.error(
+                f"Cannot update binlogs collector config: Missing S3 parameters: {missing_parameters}"
+            )
+            return False
+
+        bucket_url = (
+            f"{s3_parameters['bucket']}/{s3_parameters['path']}binlogs"
+            if s3_parameters["path"][-1] == "/"
+            else f"{s3_parameters['bucket']}/{s3_parameters['path']}/binlogs"
+        )
+
+        with io.StringIO() as string_io:
+            yaml.dump(
+                {
+                    "endpoint": s3_parameters["endpoint"],
+                    "hosts": self.charm._mysql.get_cluster_members(),
+                    "user": SERVER_CONFIG_USERNAME,
+                    "pass": self.charm.get_secret("app", SERVER_CONFIG_PASSWORD_KEY),
+                    "storage_type": "s3",
+                    "s3": {
+                        "access_key_id": s3_parameters["access-key"],
+                        "secret_access_key": s3_parameters["secret-key"],
+                        "bucket_url": bucket_url,
+                        "default_region": s3_parameters["region"],
+                    },
+                },
+                string_io,
+            )
+            self.charm._mysql.write_content_to_file(
+                path=MYSQL_BINLOGS_COLLECTOR_CONFIG_FILE,
+                content=string_io.getvalue(),
+            )
+
+        return True