Skip to content

Commit

Permalink
refactor: update manager: logs and monitor (ansible#454)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alex-Izquierdo committed Dec 11, 2023
1 parent 55d152e commit 1fe4255
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 23 deletions.
2 changes: 1 addition & 1 deletion src/aap_eda/services/activation/engine/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def stop(self, container_id: str, logger: LogHandler) -> None:
raise exceptions.ContainerStopError(e) from e

@abstractmethod
def update_logs(self, container_id: str, logger: LogHandler) -> None:
def update_logs(self, container_id: str, log_handler: LogHandler) -> None:
try:
# Implementation
...
Expand Down
10 changes: 5 additions & 5 deletions src/aap_eda/services/activation/engine/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ def _cleanup(self, job_name: str, log_handler: LogHandler):
self._delete_services()
self._delete_job()

def update_logs(self, job_name: str, log_handler: LogHandler) -> None:
def update_logs(self, container_id: str, log_handler: LogHandler) -> None:
try:
pod = self._get_job_pod(job_name)
pod = self._get_job_pod(container_id)
container_status = pod.status.container_statuses[0]
if (
container_status.state.running
Expand Down Expand Up @@ -172,13 +172,13 @@ def update_logs(self, job_name: str, log_handler: LogHandler) -> None:
log_handler.flush()
log_handler.set_log_read_at(dt)
else:
LOGGER.warning(f"Pod with label {job_name} not found.")
LOGGER.warning(f"Pod with label {container_id} not found.")
log_handler.write(
f"Pod with label {job_name} not found.", True
f"Pod with label {container_id} not found.", True
)
except ApiException as e:
LOGGER.exception(
"Failed to fetch pod logs: " f"{job_name}; error: {str(e)}"
"Failed to fetch pod logs: " f"{container_id}; error: {str(e)}"
)
raise

Expand Down
4 changes: 4 additions & 0 deletions src/aap_eda/services/activation/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class ActivationStopError(Exception):
pass


class ActivationMonitorError(Exception):
pass


class ActivationInstanceNotFound(ActivationException):
pass

Expand Down
69 changes: 52 additions & 17 deletions src/aap_eda/services/activation/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,32 +354,67 @@ def restart(self):
self.start()

def monitor(self):
# TODO: we should check if the db_instance is good
LOGGER.info(f"Monitoring activation id: {self.db_instance.id}")
try:
self._set_activation_instance()
status = self.container_engine.get_status(
self._check_latest_instance()
except (
exceptions.ActivationInstanceNotFound,
exceptions.ActivationInstancePodIdNotFound,
) as e:
LOGGER.error(f"Monitor operation Failed: {e}")
raise exceptions.ActivationMonitorError(f"{e}")

log_handler = DBLogger(self.latest_instance.id)
# TODO: long try block, we should be more specific
try:
container_status = self.container_engine.get_status(
self.latest_instance.activation_pod_id
)
LOGGER.info(f"Current status is {status}")
if status in [ActivationStatus.COMPLETED, ActivationStatus.FAILED]:
self.update_logs()
log_handler = DBLogger(self.latest_instance.id)
self.container_engine.cleanup(
self.latest_instance.activation_pod_id, log_handler
LOGGER.info(
f"Current status of instance {self.latest_instance.id} "
f"is {container_status}",
)
# TODO: implement restart policy logic
if container_status in [
ActivationStatus.COMPLETED,
ActivationStatus.FAILED,
]:
# TODO: it should be the cleanup method
# stop is implicit in the cleanup method
# stop is not clear that it performs a cleanup
# but there is not any stop without cleanup
self.container_engine.stop(
self.latest_instance.activation_pod_id,
log_handler,
)
self._set_activation_status(container_status)
self._set_activation_instance_status(container_status)
self._set_activation_pod_id(pod_id=None)
elif container_status == ActivationStatus.RUNNING:
LOGGER.info(
"Updating logs of activation instance "
f"{self.latest_instance.id}",
)
# TODO: catch exceptions
self.container_engine.update_logs(
self.latest_instance.activation_pod_id,
log_handler,
)
self._set_status(status, None)
elif status == ActivationStatus.RUNNING:
LOGGER.info("Updating logs")
self.update_logs()
except exceptions.ActivationException as e:
except engine_exceptions.ContainerEngineError as e:
# TODO: ensure we handle all the exceptions
# and we set the status correctly
self._set_status(ActivationStatus.FAILED, None, "f{e}")
LOGGER.error(f"Monitor Failed {e}")
LOGGER.error(f"Monitor operation Failed {e}")

def update_logs(self):
# TODO: Get the Activation Instance from Activation
self._set_activation_instance()
"""Update the logs of the latest instance of the activation."""
log_handler = DBLogger(self.latest_instance.id)
# TODO: check latest instance
# TODO: catch exceptions from the engine
self.container_engine.update_logs(
self.latest_instance.activation_pod_id, log_handler
container_id=self.latest_instance.activation_pod_id,
log_handler=log_handler,
)

def _create_activation_instance(self):
Expand Down

0 comments on commit 1fe4255

Please sign in to comment.