Skip to content

Commit

Permalink
Add initial attempts to automatically clean up dangling hosts
Browse files Browse the repository at this point in the history
Now, when workflows fail, Broker will attempt to find a handgling host
and check it in if found.
  • Loading branch information
JacobCallahan committed Nov 20, 2024
1 parent f5b68dc commit ed68222
Showing 1 changed file with 30 additions and 2 deletions.
32 changes: 30 additions & 2 deletions broker/providers/ansible_tower.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def __init__(self, **kwargs):
# Init the class itself
config = kwargs.get("config")
root = kwargs.get("root")
self._v2, self.username = get_awxkit_and_uname(
self._v2, self.uname = get_awxkit_and_uname(
config=config,
root=root,
url=self.url,
Expand Down Expand Up @@ -374,6 +374,33 @@ def _get_failure_messages(self, workflow):
else:
return failure_messages

def _try_get_dangling_hosts(self, failed_workflow):
"""Get one or more hosts that may have been left behind by a failed workflow."""
hosts = []
for node in failed_workflow.get_related("workflow_nodes").results:
if not (job_fields := node.summary_fields.get("job", {})) or job_fields.get(
"failed"
): # skip jobs with no summary fields and failed jobs
continue
if jobs := self._v2.jobs.get(id=job_fields["id"]).results:
if vm_name := jobs[0].artifacts.get("vm_name"):
hosts.append(vm_name)
return list(set(hosts))

def _try_checkin_dangling_host(self, job):
"""Attempt to check in dangling hosts associated with the given job."""
dangling_hosts = self._try_get_dangling_hosts(job)
if not dangling_hosts:
logger.debug("No dangling hosts found for the failed job.")
return
for dangling_host in dangling_hosts:
logger.info(f"Found dangling host: {dangling_host}. Attempting to check in.")
try:
self.release(dangling_host)
logger.debug(f"Successfully checked in dangling host: {dangling_host}")
except exceptions.BrokerError:
logger.warning(f"Failed to check in dangling host: {dangling_host}")

def _compile_host_info(self, host):
try:
host_facts = host.related.ansible_facts.get()
Expand Down Expand Up @@ -607,14 +634,15 @@ def execute(self, **kwargs): # noqa: PLR0912,PLR0915 - Possible TODO refactor
"URL": job_ui_url,
}
helpers.emit(message_data)
self._try_checkin_dangling_host(job)
raise JobExecutionError(message_data=message_data["Reason(s)"])
if strategy := kwargs.pop("artifacts", None):
return self._merge_artifacts(job, strategy=strategy)
return job

def get_inventory(self, user=None):
"""Compile a list of hosts based on any inventory a user's name is mentioned."""
user = user or self.username
user = user or self.uname
invs = [
inv
for inv in self._v2.inventory.get(page_size=200).results
Expand Down

0 comments on commit ed68222

Please sign in to comment.