diff --git a/canine/backends/base.py b/canine/backends/base.py index 1da92a28..a67f2c47 100644 --- a/canine/backends/base.py +++ b/canine/backends/base.py @@ -211,6 +211,50 @@ def walk(self, path: str) -> typing.Generator[typing.Tuple[str, typing.List[str] for dirname in dirnames: yield from self.walk(os.path.join(path, dirname)) + def rmtree(self, path: str, max_retries: int = 5, timeout: int = 5): + """ + Recursively remove the directory tree rooted at the given path. + Automatically retries failures after a brief timeout + """ + pathstat = self.stat(path) + if not stat.S_ISDIR(pathstat.st_mode): + raise NotADirectoryError(path) + for attempt in range(max_retries): + try: + return self._rmtree(path, pathstat) + except (OSError, FileNotFoundError, IOError, NotADirectoryError): + # Helps to preserve the exception traceback by conditionally re-raising here + if attempt >= (max_retries - 1): + raise + time.sleep(timeout) + # Should not be possible to reach here + raise RuntimeError("AbstractTransport.rmtree exceeded retries without exception") + + def _rmtree(self, path: str, pathstat: os.stat_result): + """ + (Internal) + Recursively remove the directory tree rooted at the given path. + Automatically retries failures after a brief timeout + """ + if not stat.S_ISDIR(pathstat.st_mode): + raise NotADirectoryError(path) + for fname in self.listdir(path): + fname = os.path.join(path, fname) + try: + fstat = self.stat(fname) + except FileNotFoundError: + # Handling for broken symlinks is bad + self.remove(fname) + else: + if stat.S_ISDIR(fstat.st_mode): + self._rmtree( + fname, + fstat + ) + else: + self.remove(fname) + self.rmdir(path) + def sendtree(self, src: str, dest: str): """ Copy the full local file tree src to the remote path dest diff --git a/canine/backends/dockerTransient.py b/canine/backends/dockerTransient.py index e6b46735..a754c1f5 100644 --- a/canine/backends/dockerTransient.py +++ b/canine/backends/dockerTransient.py @@ -11,6 +11,8 @@ import io import pickle import math +import threading +import time from .imageTransient import TransientImageSlurmBackend, list_instances, gce from ..utils import get_default_gcp_project, gcp_hourly_cost @@ -19,15 +21,13 @@ class DockerTransientImageSlurmBackend(TransientImageSlurmBackend): # {{{ def __init__( - self, nfs_compute_script = "/usr/local/share/cga_pipeline/src/provision_storage_container_host.sh", - compute_script = "/usr/local/share/cga_pipeline/src/provision_worker_container_host.sh", + self, cluster_name, *, + nfs_compute_script = "/usr/local/share/slurm_gcp_docker/src/provision_storage_container_host.sh", + compute_script = "/usr/local/share/slurm_gcp_docker/src/provision_worker_container_host.sh", nfs_disk_size = 2000, nfs_disk_type = "pd-standard", nfs_action_on_stop = "stop", nfs_image = "", - action_on_stop = "delete", image_family = "pydpiper", image = None, - cluster_name = None, clust_frac = 0.01, user = os.environ["USER"], **kwargs + action_on_stop = "delete", image_family = "slurm-gcp-docker", image = None, + clust_frac = 0.01, user = os.environ["USER"], **kwargs ): - if cluster_name is None: - raise ValueError("You must specify a name for this Slurm cluster!") - if "image" not in kwargs: kwargs["image"] = image @@ -70,6 +70,8 @@ def __init__( self.NFS_server_ready = False self.NFS_ready = False + self.NFS_monitor_thread = None + self.NFS_monitor_lock = None def init_slurm(self): self.dkr = docker.from_env() @@ -77,7 +79,7 @@ def init_slurm(self): # # check if image exists try: - image = self.dkr.images.get('broadinstitute/pydpiper:latest') + image = self.dkr.images.get('broadinstitute/slurm_gcp_docker:latest') except docker.errors.ImageNotFound: raise Exception("You have not yet built or pulled the Slurm Docker image!") @@ -100,10 +102,14 @@ def init_slurm(self): # # create the Slurm container if it's not already present if self.config["cluster_name"] not in [x.name for x in self.dkr.containers.list()]: - #if image not in [x.image for x in self.dkr.containers.list()]: + # FIXME: gcloud is cloud-provider specific. how can we make this more generic? + gcloud_conf_dir = subprocess.check_output("echo -n ~/.config/gcloud", shell = True).decode() self.dkr.containers.run( image = image.tags[0], detach = True, network_mode = "host", - volumes = { "/mnt/nfs" : { "bind" : "/mnt/nfs", "mode" : "rw" } }, + volumes = { + "/mnt/nfs" : { "bind" : "/mnt/nfs", "mode" : "rw" }, + gcloud_conf_dir : { "bind" : "/etc/gcloud", "mode" : "rw" } + }, name = self.config["cluster_name"], command = "/bin/bash", user = self.config["user"], stdin_open = True, remove = True ) @@ -131,7 +137,7 @@ def init_nodes(self): if not self.NFS_ready: raise Exception("NFS must be mounted before starting nodes!") - self.wait_for_cluster_ready() + self.wait_for_cluster_ready(elastic = True) # list all the nodes that Slurm is aware of @@ -167,33 +173,59 @@ def init_nodes(self): ]) def stop(self): - # stop the Docker - if self.container is not None: - self.container().stop() - # delete node configuration file - subprocess.check_call("rm -f /mnt/nfs/clust_conf/canine/backend_conf.pickle", shell = True) + try: + subprocess.check_call("rm -f /mnt/nfs/clust_conf/canine/backend_conf.pickle", shell = True) + except subprocess.CalledProcessError as e: + print("Couldn't delete node configuration file:", file = sys.stderr) + print(e) - # get list of nodes that still exist + # + # shutdown nodes that are still running (except NFS) allnodes = self.nodes - extant_nodes = self.list_instances_all_zones() - self.nodes = allnodes.loc[allnodes.index.isin(extant_nodes["name"]) & - (allnodes["machine_type"] != "nfs")] + + # sometimes the Google API will spectacularly fail; in that case, we + # just try to shutdown everything in the node list, regardless of whether + # it exists. + try: + extant_nodes = self.list_instances_all_zones() + self.nodes = allnodes.loc[allnodes.index.isin(extant_nodes["name"]) & + (allnodes["machine_type"] != "nfs")] + except: + self.nodes = allnodes.loc[allnodes["machine_type"] != "nfs"] # superclass method will stop/delete/leave these running, depending on how # self.config["action_on_stop"] is set super().stop() - # we handle the NFS separately - self.nodes = allnodes.loc[allnodes["machine_type"] == "nfs"] - super().stop(action_on_stop = self.config["nfs_action_on_stop"]) + # + # stop the Docker + + # this needs to happen after super().stop() is invoked, since that + # calls scancel, which in turn requires a running Slurm controller Docker + if self.container is not None: + self.container().stop() + + # + # unmount the NFS + # this needs to be the last step, since Docker will hang if NFS is pulled + # out from under it if self.config["nfs_action_on_stop"] != "run": try: subprocess.check_call("sudo umount -f /mnt/nfs", shell = True) except subprocess.CalledProcessError: print("Could not unmount NFS (do you have open files on it?)\nPlease run `lsof | grep /mnt/nfs`, close any open files, and run `sudo umount -f /mnt/nfs` before attempting to run another pipeline.") + # superclass method will stop/delete/leave the NFS running, depending on + # how self.config["nfs_action_on_stop"] is set. + + # kill thread that auto-restarts NFS + self.NFS_monitor_lock.set() + + self.nodes = allnodes.loc[allnodes["machine_type"] == "nfs"] + super().stop(action_on_stop = self.config["nfs_action_on_stop"], kill_straggling_jobs = False) + def _get_container(self, container_name): def closure(): return self.dkr.containers.get(container_name) @@ -246,6 +278,14 @@ def start_NFS(self): ) print("done", flush = True) + # start NFS monitoring thread + self.NFS_monitor_lock = threading.Event() + self.NFS_monitor_thread = threading.Thread( + target = self.autorestart_preempted_node, + args = (nfs_nodename,) + ) + self.NFS_monitor_thread.start() + self.NFS_server_ready = True def mount_NFS(self): @@ -270,10 +310,24 @@ def get_latest_image(self, image_family = None): return gce.images().getFromFamily(family = image_family, project = self.config["project"]).execute() def invoke(self, command, interactive = False): - return_code, (stdout, stderr) = self.container().exec_run( - command, demux = True, tty = interactive, stdin = interactive - ) - return (return_code, io.BytesIO(stdout), io.BytesIO(stderr)) + if self.container is not None and self.container().status == "running": + return_code, (stdout, stderr) = self.container().exec_run( + command, demux = True, tty = interactive, stdin = interactive + ) + return (return_code, io.BytesIO(stdout), io.BytesIO(stderr)) + else: + return (1, io.BytesIO(), io.BytesIO(b"Container is not running!")) + + def autorestart_preempted_node(self, nodename): + while not self.NFS_monitor_lock.is_set(): + try: + inst_details = self._pzw(gce.instances().get)(instance = nodename).execute() + if inst_details["status"] != "RUNNING": + self._pzw(gce.instances().start)(instance = nodename).execute() + except: + print("Error querying NFS server status; retrying in 60s ...", file = sys.stderr) + + time.sleep(60) # }}} diff --git a/canine/backends/imageTransient.py b/canine/backends/imageTransient.py index 465ff48c..665baf5e 100644 --- a/canine/backends/imageTransient.py +++ b/canine/backends/imageTransient.py @@ -1,5 +1,6 @@ # vim: set expandtab: +import time import typing import subprocess import os @@ -9,6 +10,7 @@ from ..utils import get_default_gcp_project, gcp_hourly_cost import googleapiclient.discovery as gd +import googleapiclient.errors import pandas as pd gce = gd.build('compute', 'v1') @@ -257,7 +259,7 @@ def init_nodes(self): print("WARNING: couldn't shutdown instance {}".format(node), file = sys.stderr) print(e) - def stop(self, action_on_stop = None): + def stop(self, action_on_stop = None, kill_straggling_jobs = True): """ Delete or stop (default) compute instances """ @@ -265,8 +267,26 @@ def stop(self, action_on_stop = None): action_on_stop = self.config["action_on_stop"] # - # stop, delete, or leave running compute nodes + # kill any still-running jobs + if kill_straggling_jobs: + try: + self.scancel(jobID = "", user = self.config["user"]) + + # wait for jobs to finish + print("Terminating all jobs ... ", end = "", flush = True) + tot_time = 0 + while True: + if self.squeue().empty or tot_time > 60: + break + tot_time += 1 + time.sleep(1) + print("done") + except Exception as e: + print("Error terminating all jobs!", file = sys.stderr) + print(e, file = sys.stderr) + # + # stop, delete, or leave running compute nodes for node in self.nodes.index: try: if action_on_stop == "delete": @@ -277,6 +297,10 @@ def stop(self, action_on_stop = None): else: # default behavior is to shut down self._pzw(gce.instances().stop)(instance = node).execute() + except googleapiclient.errors.HttpError as e: + if e.resp != 404: + print("WARNING: couldn't shutdown instance {}".format(node), file = sys.stderr) + print(e) except Exception as e: print("WARNING: couldn't shutdown instance {}".format(node), file = sys.stderr) print(e) @@ -292,11 +316,11 @@ def list_instances_all_zones(self): for x in zone_dict["items"] ], axis = 0).reset_index(drop = True) - def wait_for_cluster_ready(self): + def wait_for_cluster_ready(self, elastic = False): """ Blocks until the main partition is marked as up """ - super().wait_for_cluster_ready(elastic = False) + super().wait_for_cluster_ready(elastic = elastic) # a handy wrapper to automatically add this instance's project and zone to # GCP API calls diff --git a/canine/backends/local.py b/canine/backends/local.py index 7c09de9a..fecd9e93 100644 --- a/canine/backends/local.py +++ b/canine/backends/local.py @@ -3,6 +3,7 @@ import io import sys import subprocess +import shutil from .base import AbstractSlurmBackend, AbstractTransport from ..utils import ArgumentHelper, check_call from agutil import StdOutAdapter @@ -98,6 +99,14 @@ def walk(self, path: str) -> typing.Generator[typing.Tuple[str, typing.List[str] """ yield from os.walk(path) + def _rmtree(self, path: str, pathstat: os.stat_result): + """ + (Internal) + Recursively remove the directory tree rooted at the given path. + Automatically retries failures after a brief timeout + """ + shutil.rmtree(path) + class LocalSlurmBackend(AbstractSlurmBackend): """ SLURM backend for interacting with a local slurm node diff --git a/canine/localization/base.py b/canine/localization/base.py index 502de07a..3d3d73ec 100644 --- a/canine/localization/base.py +++ b/canine/localization/base.py @@ -437,7 +437,7 @@ def prepare_job_inputs(self, jobId: str, job_inputs: typing.Dict[str, str], comm 'stream', value ) - elif mode == 'localize': + elif mode in ['localize', 'symlink']: self.inputs[jobId][arg] = Localization( None, self.reserve_path('jobs', jobId, 'inputs', os.path.basename(os.path.abspath(value))) diff --git a/canine/localization/local.py b/canine/localization/local.py index 3f986151..2b6de42b 100644 --- a/canine/localization/local.py +++ b/canine/localization/local.py @@ -40,6 +40,7 @@ def __init__( super().__init__(backend, transfer_bucket, common, staging_dir, mount_path, project) self.queued_gs = [] # Queued gs:// -> remote staging transfers self.queued_batch = [] # Queued local -> remote directory transfers + self._has_localized = False def localize_file(self, src: str, dest: PathType, transport: typing.Optional[AbstractTransport] = None): """ @@ -47,20 +48,36 @@ def localize_file(self, src: str, dest: PathType, transport: typing.Optional[Abs gs:// files are queued for later transfer local files are symlinked to the staging directory """ - if src.startswith('gs://'): - self.queued_gs.append(( - src, - dest.controllerpath, - 'remote' - )) - elif os.path.exists(src): - src = os.path.abspath(src) - if not os.path.isdir(os.path.dirname(dest.localpath)): - os.makedirs(os.path.dirname(dest.localpath)) - if os.path.isfile(src): - os.symlink(src, dest.localpath) - else: - self.queued_batch.append((src, os.path.join(dest.controllerpath, os.path.basename(src)))) + if not self._has_localized: + if src.startswith('gs://'): + self.queued_gs.append(( + src, + dest.controllerpath, + 'remote' + )) + elif os.path.exists(src): + src = os.path.abspath(src) + if not os.path.isdir(os.path.dirname(dest.localpath)): + os.makedirs(os.path.dirname(dest.localpath)) + if os.path.isfile(src): + os.symlink(src, dest.localpath) + else: + self.queued_batch.append((src, os.path.join(dest.controllerpath, os.path.basename(src)))) + else: + warnings.warn("BatchedLocalizer.localize_file called after main localization. Ignoring normal handling and sending over transport") + with self.transport_context(transport) as transport: + if not transport.isdir(os.path.dirname(dest.controllerpath)): + transport.makedirs(os.path.dirname(dest.controllerpath)) + if src.startswith('gs://'): + self.gs_copy( + src, + dest.controllerpath, + 'remote' + ) + elif os.path.isfile(src): + transport.send(src, dest.controllerpath) + else: + transport.sendtree(src, dest.controllerpath) def __enter__(self): """ @@ -118,13 +135,14 @@ def localize(self, inputs: typing.Dict[str, typing.Dict[str, str]], patterns: ty self.sendtree( self.local_dir, self.staging_dir, - transport + transport,exist_okay=True ) staging_dir = self.finalize_staging_dir(inputs.keys(), transport=transport) for src, dest, context in self.queued_gs: self.gs_copy(src, dest, context) for src, dest in self.queued_batch: self.sendtree(src, os.path.dirname(dest)) + self._has_localized = True return staging_dir class LocalLocalizer(BatchedLocalizer): @@ -147,17 +165,33 @@ def localize_file(self, src: str, dest: PathType, transport: typing.Optional[Abs gs:// files are queued for later transfer local files are symlinked to the staging directory """ - if src.startswith('gs://'): - self.gs_copy( - src, - dest.localpath, - 'local' - ) - elif os.path.exists(src): - src = os.path.abspath(src) - if not os.path.isdir(os.path.dirname(dest.localpath)): - os.makedirs(os.path.dirname(dest.localpath)) - if os.path.isfile(src): - os.symlink(src, dest.localpath) - else: - self.queued_batch.append((src, os.path.join(dest.controllerpath, os.path.basename(src)))) + if self._has_localized: + if src.startswith('gs://'): + self.gs_copy( + src, + dest.localpath, + 'local' + ) + elif os.path.exists(src): + src = os.path.abspath(src) + if not os.path.isdir(os.path.dirname(dest.localpath)): + os.makedirs(os.path.dirname(dest.localpath)) + if os.path.isfile(src): + os.symlink(src, dest.localpath) + else: + self.queued_batch.append((src, os.path.join(dest.controllerpath, os.path.basename(src)))) + else: + warnings.warn("LocalLocalizer.localize_file called after main localization. Ignoring normal handling and sending over transport") + with self.transport_context(transport) as transport: + if not transport.isdir(os.path.dirname(dest.controllerpath)): + transport.makedirs(os.path.dirname(dest.controllerpath)) + if src.startswith('gs://'): + self.gs_copy( + src, + dest.controllerpath, + 'remote' + ) + elif os.path.isfile(src): + transport.send(src, dest.controllerpath) + else: + transport.sendtree(src, dest.controllerpath) diff --git a/canine/localization/nfs.py b/canine/localization/nfs.py index 4cf295ac..967948c8 100644 --- a/canine/localization/nfs.py +++ b/canine/localization/nfs.py @@ -86,15 +86,7 @@ def localize_file(self, src: str, dest: PathType, transport: typing.Optional[Abs # # check if self.mount_path, self.local_dir, and src all exist on the same NFS share # symlink if yes, copy if no - vols = subprocess.check_output( - "df {} {} {} | awk 'NR > 1 {{ print $1 }}'".format( - self.mount_path, - self.local_dir, - src - ), - shell = True - ) - if len(set(vols.decode("utf-8").rstrip().split("\n"))) == 1: + if self.same_volume(src): os.symlink(src, dest.localpath) else: if os.path.isfile(src): @@ -116,6 +108,20 @@ def localize(self, inputs: typing.Dict[str, typing.Dict[str, str]], patterns: ty """ if overrides is None: overrides = {} + + # automatically override inputs that are absolute paths residing on the same + # NFS share and are not Canine outputs + + # XXX: this can be potentially slow, since it has to iterate over every + # single input. It would make more sense to do this before the adapter + # converts raw inputs. + for input_dict in inputs.values(): + for k, v in input_dict.items(): + if k not in overrides: + if re.match(r"^/", v) is not None and self.same_volume(v) and \ + re.match(r".*/outputs/\d+/.*", v) is None: + overrides[k] = None + overrides = {k:v.lower() if isinstance(v, str) else None for k,v in overrides.items()} with self.backend.transport() as transport: if self.common: @@ -269,3 +275,15 @@ def finalize_staging_dir(self, jobs: typing.Iterable[str], transport: typing.Opt if len(jobs) and not os.path.isdir(controller_env['CANINE_OUTPUT']): os.mkdir(controller_env['CANINE_OUTPUT']) return self.staging_dir + + def same_volume(self, *args): + """ + Check if *args are stored on the same NFS mount as the output directory. + """ + vols = subprocess.check_output( + "df {} | awk 'NR > 1 {{ print $1 }}'".format( + " ".join([shlex.quote(x) for x in [self.mount_path, self.local_dir, *args]]) + ), + shell = True + ) + return len(set(vols.decode("utf-8").rstrip().split("\n"))) == 1 diff --git a/canine/orchestrator.py b/canine/orchestrator.py index e3d84fd4..77bae371 100644 --- a/canine/orchestrator.py +++ b/canine/orchestrator.py @@ -10,6 +10,7 @@ from .localization import AbstractLocalizer, BatchedLocalizer, LocalLocalizer, RemoteLocalizer, NFSLocalizer from .utils import check_call import yaml +import numpy as np import pandas as pd from agutil import status_bar version = '0.7.1' @@ -70,6 +71,14 @@ def stringify(obj: typing.Any) -> typing.Any: key:Orchestrator.stringify(val) for key, val in obj.items() } + elif isinstance(obj, pd.core.series.Series): + return [ + Orchestrator.stringify(elem) + for elem in obj.tolist() + ] + elif isinstance(obj, pd.core.frame.DataFrame): + return Orchestrator.stringify(obj.to_dict(orient = "list")) + return str(obj) @staticmethod @@ -101,7 +110,12 @@ def fill_config(cfg: typing.Union[str, typing.Dict[str, typing.Any]]) -> typing. return cfg - def __init__(self, config: typing.Union[str, typing.Dict[str, typing.Any]]): + def __init__(self, config: typing.Union[ + str, + typing.Dict[str, typing.Any], + pd.core.frame.DataFrame, + pd.core.series.Series + ]): """ Initializes the Orchestrator from a given config """ @@ -173,6 +187,10 @@ def __init__(self, config: typing.Union[str, typing.Dict[str, typing.Any]]): if 'stderr' not in self.raw_outputs: self.raw_outputs['stderr'] = '../stderr' + # placeholder for dataframe containing previous results that were + # job avoided + self.df_avoided = None + def run_pipeline(self, output_dir: str = 'canine_output', dry_run: bool = False) -> pd.DataFrame: """ Runs the configured pipeline @@ -257,7 +275,8 @@ def run_pipeline(self, output_dir: str = 'canine_output', dry_run: bool = False) localizer.clean_on_exit = False raise finally: - if len(completed_jobs): + # Check if fully job-avoided so we still delocalize + if batch_id == -2 or len(completed_jobs): print("Delocalizing outputs") outputs = localizer.delocalize(self.raw_outputs, output_dir) print("Parsing output data") @@ -271,7 +290,7 @@ def run_pipeline(self, output_dir: str = 'canine_output', dry_run: bool = False) runtime/3600, node_uptime=sum(uptime.values())/120 )[0]) - job_cost = self.backend.estimate_cost(job_cpu_time=df[('job', 'cpu_hours')].to_dict())[1] + job_cost = self.backend.estimate_cost(job_cpu_time=df[('job', 'cpu_seconds')].to_dict())[1] df['est_cost'] = [job_cost[job_id] for job_id in df.index] if job_cost is not None else [0] * len(df) except: traceback.print_exc() @@ -356,49 +375,62 @@ def wait_for_jobs_to_finish(self, batch_id): return completed_jobs, cpu_time, uptime, prev_acct def make_output_DF(self, batch_id, outputs, cpu_time, prev_acct, localizer = None) -> pd.DataFrame: - try: - acct = self.backend.sacct(job=batch_id) - - df = pd.DataFrame.from_dict( - data={ - job_id: { - ('job', 'slurm_state'): acct['State'][batch_id+'_'+job_id], - ('job', 'exit_code'): acct['ExitCode'][batch_id+'_'+job_id], - ('job', 'cpu_hours'): (prev_acct['CPUTimeRAW'][batch_id+'_'+job_id] + ( - cpu_time[batch_id+'_'+job_id] if batch_id+'_'+job_id in cpu_time else 0 - ))/3600 if prev_acct is not None else -1, - **{ ('inputs', key) : val for key, val in self.job_spec[job_id].items() }, - **{ - ('outputs', key) : val[0] if isinstance(val, list) and len(val) == 1 else val - for key, val in outputs[job_id].items() + df = pd.DataFrame() + if batch_id != -2: + try: + acct = self.backend.sacct(job=batch_id) + + df = pd.DataFrame.from_dict( + data={ + job_id: { + ('job', 'slurm_state'): acct['State'][batch_id+'_'+str(array_id)], + ('job', 'exit_code'): acct['ExitCode'][batch_id+'_'+str(array_id)], + ('job', 'cpu_seconds'): (prev_acct['CPUTimeRAW'][batch_id+'_'+str(array_id)] + ( + cpu_time[batch_id+'_'+str(array_id)] if batch_id+'_'+str(array_id) in cpu_time else 0 + )) if prev_acct is not None else -1, + **{ ('inputs', key) : val for key, val in self.job_spec[job_id].items() }, + **{ + ('outputs', key) : val[0] if isinstance(val, list) and len(val) == 1 else val + for key, val in outputs[job_id].items() + } } - } - for job_id in self.job_spec - }, - orient = "index" - ).rename_axis(index = "_job_id").astype({('job', 'cpu_hours'): int}) - - # - # apply functions to output columns (if any) - if len(self.output_map) > 0: - # columns that receive no (i.e., identity) transformation - identity_map = { x : lambda y : y for x in set(df.columns.get_loc_level("outputs")[1]) - self.output_map.keys() } - - # we get back all columns from the dataframe by aggregating columns - # that don't receive any transformation with transformed columns - df["outputs"] = df["outputs"].agg({ **self.output_map, **identity_map }) - except: - df = pd.DataFrame() + for array_id, job_id in enumerate(self.job_spec) + }, + orient = "index" + ).rename_axis(index = "_job_id").astype({('job', 'cpu_seconds'): int}) - if isinstance(localizer, AbstractLocalizer): - fname = "results.k9df.pickle" - df.to_pickle(fname) - localizer.localize_file(fname, localizer.reserve_path(localizer.staging_dir, "results.k9df.pickle")) - os.remove(fname) + # + # apply functions to output columns (if any) + if len(self.output_map) > 0: + # columns that receive no (i.e., identity) transformation + identity_map = { x : lambda y : y for x in set(df.columns.get_loc_level("outputs")[1]) - self.output_map.keys() } + + # we get back all columns from the dataframe by aggregating columns + # that don't receive any transformation with transformed columns + df["outputs"] = df["outputs"].agg({ **self.output_map, **identity_map }) + except: + traceback.print_exc() + + # concatenate with any previously existing job avoided results + if self.df_avoided is not None: + df = pd.concat([df, self.df_avoided]).sort_index() + + # save DF to disk + if isinstance(localizer, AbstractLocalizer): + with localizer.transport_context() as transport: + dest = localizer.reserve_path("results.k9df.pickle").controllerpath + if not transport.isdir(os.path.dirname(dest)): + transport.makedirs(os.path.dirname(dest)) + with transport.open(dest, 'wb') as w: + df.to_pickle(w, compression=None) return df - def submit_batch_job(self, entrypoint_path, compute_env, extra_sbatch_args = {}): + def submit_batch_job(self, entrypoint_path, compute_env, extra_sbatch_args = {}) -> int: + # this job was avoided + if len(self.job_spec) == 0: + return -2 + batch_id = self.backend.sbatch( entrypoint_path, **{ @@ -414,33 +446,99 @@ def submit_batch_job(self, entrypoint_path, compute_env, extra_sbatch_args = {}) return batch_id - def job_avoid(self, localizer, overwrite = False): #TODO: add params for type of avoidance (force, only if failed, etc.) - # is there preexisting output? - df_path = localizer.reserve_path(localizer.staging_dir, "results.k9df.pickle") - if os.path.exists(df_path.localpath): - # load in results and job spec dataframes - r_df = pd.read_pickle(df_path.localpath) - js_df = pd.DataFrame.from_dict(self.job_spec, orient = "index").rename_axis(index = "_job_id") - js_df.columns = pd.MultiIndex.from_product([["inputs"], js_df.columns]) - - r_df = r_df.reset_index(col_level = 0, col_fill = "_job_id") - js_df = js_df.reset_index(col_level = 0, col_fill = "_job_id") - - # check if jobs are compatible: they must have identical inputs and index, - # and output columns must be matching - if not r_df["inputs"].columns.equals(js_df["inputs"].columns): - raise ValueError("Cannot job avoid; set of input parameters do not match") - - r_df_inputs = r_df[["inputs", "_job_id"]].droplevel(0, axis = 1) - js_df_inputs = js_df[["inputs", "_job_id"]].droplevel(0, axis = 1) - - merge_df = r_df_inputs.join(js_df_inputs, how = "outer", lsuffix = "__r", rsuffix = "__js") - merge_df.columns = pd.MultiIndex.from_tuples([x.split("__")[::-1] for x in merge_df.columns]) - - if not merge_df["r"].equals(merge_df["js"]): - raise ValueError("Cannot job avoid; values of input parameters do not match") - - # if jobs are indeed compatible, we can figure out which need to be re-run - r_df.loc[r_df[("job", "slurm_state")] == "FAILED"] - - # destroy their output directories + def job_avoid(self, localizer: AbstractLocalizer, overwrite: bool = False) -> int: #TODO: add params for type of avoidance (force, only if failed, etc.) + """ + Detects jobs which have previously been run in this staging directory. + Succeeded jobs are skipped. Failed jobs are reset and rerun + """ + with localizer.transport_context() as transport: + df_path = localizer.reserve_path("results.k9df.pickle").controllerpath + + #remove all output if specified + if overwrite: + if transport.isdir(localizer.staging_dir): + transport.rmtree(localizer.staging_dir) + transport.makedirs(localizer.staging_dir) + return 0 + + # check for preexisting jobs' output + if transport.exists(df_path): + try: + # load in results and job spec dataframes + with transport.open(df_path) as r: + r_df = pd.read_pickle(r, compression=None) + js_df = pd.DataFrame.from_dict(self.job_spec, orient = "index").rename_axis(index = "_job_id") + + if r_df.empty or \ + "inputs" not in r_df or \ + ("outputs" not in r_df and len(self.raw_outputs) > 0): + raise ValueError("Could not recover previous job results!") + + # check if jobs are compatible: they must have identical inputs and index, + # and output columns must be matching + if not (r_df["inputs"].columns.isin(js_df.columns).all() and \ + js_df.columns.isin(r_df["inputs"].columns).all()): + r_df_set = set(r_df["inputs"].columns) + js_df_set = set(js_df.columns) + raise ValueError( + "Cannot job avoid; sets of input parameters do not match! Parameters unique to:\n" + \ + "\u21B3saved: " + ", ".join(r_df_set - js_df_set) + "\n" + \ + "\u21B3job: " + ", ".join(js_df_set - r_df_set) + ) + + output_temp = pd.Series(index = self.raw_outputs.keys()) + if not (r_df["outputs"].columns.isin(output_temp.index).all() and \ + output_temp.index.isin(r_df["outputs"].columns).all()): + r_df_set = set(r_df["outputs"].columns) + o_df_set = set(output_temp.index) + raise ValueError( + "Cannot job avoid; sets of output parameters do not match! Parameters unique to:\n" + \ + "\u21B3saved: " + ", ".join(r_df_set - o_df_set) + "\n" + \ + "\u21B3job: " + ", ".join(o_df_set - r_df_set) + ) + + # check that values of inputs are the same + # we have to sort because the order of jobs might differ for the same + # inputs + sort_cols = r_df.columns.to_series()["inputs"] + r_df = r_df.sort_values(sort_cols.tolist()) + js_df = js_df.sort_values(sort_cols.index.tolist()) + + if not r_df["inputs"].equals(js_df): + raise ValueError("Cannot job avoid; values of input parameters do not match!") + + # if all is well, figure out which jobs need to be re-run + fail_idx = r_df[("job", "slurm_state")] == "FAILED" + self.df_avoided = r_df.loc[~fail_idx] + + # remove jobs that don't need to be re-run from job_spec + for k in r_df.index[~fail_idx]: + self.job_spec.pop(k, None) + + # remove output directories of failed jobs + for k in self.job_spec: + transport.rmtree( + localizer.reserve_path('jobs', k).controllerpath + ) + + # we also have to remove the common inputs directory, so that + # the localizer can regenerate it + if len(self.job_spec) > 0: + transport.rmtree( + localizer.reserve_path('common').controllerpath + ) + + return np.count_nonzero(~fail_idx) + except (ValueError, OSError) as e: + print(e) + print("Overwriting output and aborting job avoidance.") + transport.rmtree(localizer.staging_dir) + transport.makedirs(localizer.staging_dir) + return 0 + + # if the output directory exists but there's no output dataframe, assume + # it's corrupted and remove it + elif transport.isdir(localizer.staging_dir): + transport.rmtree(localizer.staging_dir) + transport.makedirs(localizer.staging_dir) + return 0 diff --git a/canine/utils.py b/canine/utils.py index 3c27352a..00d627c6 100644 --- a/canine/utils.py +++ b/canine/utils.py @@ -10,6 +10,8 @@ from subprocess import CalledProcessError import google.auth import paramiko +import shutil +import time class ArgumentHelper(dict): """ @@ -264,3 +266,5 @@ def gcp_hourly_cost(mtype: str, preemptible: bool = False, ssd_size: int = 0, hd else (gpu_pricing[gpu_type][1 if preemptible else 0] * gpu_count) ) ) + +# rmtree_retry removed in favor of AbstractTransport.rmtree diff --git a/docs/canine/backends.html b/docs/canine/backends.html index dbc1c761..7283a718 100644 --- a/docs/canine/backends.html +++ b/docs/canine/backends.html @@ -239,6 +239,13 @@

canine.backends +
+rmtree(path: str, max_retries: int = 5, timeout: int = 5)
+

Recursively remove the directory tree rooted at the given path. +Automatically retries failures after a brief timeout

+
+
send(localfile: Union[str, IO], remotefile: Union[str, IO])
@@ -524,6 +531,13 @@

canine.backends +
+rmtree(path: str, max_retries: int = 5, timeout: int = 5)
+

Recursively remove the directory tree rooted at the given path. +Automatically retries failures after a brief timeout

+

+
send(localfile: Union[str, IO], remotefile: Union[str, IO])
@@ -825,7 +839,7 @@

canine.backends
-stop(action_on_stop=None)
+stop(action_on_stop=None, kill_straggling_jobs=True)

Delete or stop (default) compute instances

@@ -838,7 +852,7 @@

canine.backends
-wait_for_cluster_ready()
+wait_for_cluster_ready(elastic=False)

Blocks until the main partition is marked as up

@@ -846,7 +860,7 @@

canine.backends
-class canine.backends.DockerTransientImageSlurmBackend(nfs_compute_script='/usr/local/share/cga_pipeline/src/provision_storage_container_host.sh', compute_script='/usr/local/share/cga_pipeline/src/provision_worker_container_host.sh', nfs_disk_size=2000, nfs_disk_type='pd-standard', nfs_action_on_stop='stop', nfs_image='', action_on_stop='delete', image_family='pydpiper', image=None, cluster_name=None, clust_frac=0.01, user='aarong', **kwargs)
+class canine.backends.DockerTransientImageSlurmBackend(nfs_compute_script='/usr/local/share/slurm_gcp_docker/src/provision_storage_container_host.sh', compute_script='/usr/local/share/slurm_gcp_docker/src/provision_worker_container_host.sh', nfs_disk_size=2000, nfs_disk_type='pd-standard', nfs_action_on_stop='stop', nfs_image='', action_on_stop='delete', image_family='slurm-gcp-docker', image=None, cluster_name=None, clust_frac=0.01, user='aarong', **kwargs)
estimate_cost(clock_uptime: Optional[float] = None, node_uptime: Optional[float] = None, job_cpu_time: Optional[Dict[str, float]] = None) → Tuple[float, Optional[Dict[str, float]]]
@@ -946,7 +960,7 @@

canine.backends
-wait_for_cluster_ready()
+wait_for_cluster_ready(elastic=False)

Blocks until the main partition is marked as up

diff --git a/docs/canine/index.html b/docs/canine/index.html index a4d19ca3..ba415f1d 100644 --- a/docs/canine/index.html +++ b/docs/canine/index.html @@ -37,7 +37,7 @@

canine
-class canine.Orchestrator(config: Union[str, Dict[str, Any]])
+class canine.Orchestrator(config: Union[str, Dict[str, Any], pandas.core.frame.DataFrame, pandas.core.series.Series])

Main class Parses a configuration object, initializes, runs, and cleans up a Canine Pipeline

@@ -47,6 +47,13 @@

canine +
+job_avoid(localizer: canine.localization.base.AbstractLocalizer, overwrite: bool = False) → int
+

Detects jobs which have previously been run in this staging directory. +Succeeded jobs are skipped. Failed jobs are reset and rerun

+

+
run_pipeline(output_dir: str = 'canine_output', dry_run: bool = False) → pandas.core.frame.DataFrame
diff --git a/docs/canine/localization.html b/docs/canine/localization.html index bbe5c6c5..f87f16ce 100644 --- a/docs/canine/localization.html +++ b/docs/canine/localization.html @@ -545,6 +545,12 @@

canine.localization +
+same_volume(*args)
+

Check if *args are stored on the same NFS mount as the output directory.

+

+
sendtree(src: str, dest: str, transport: Optional[canine.backends.base.AbstractTransport] = None, exist_okay=False)
diff --git a/docs/canine/xargs.html b/docs/canine/xargs.html index d441645c..c073709b 100644 --- a/docs/canine/xargs.html +++ b/docs/canine/xargs.html @@ -49,6 +49,13 @@

canine.xargs +
+job_avoid(localizer: canine.localization.base.AbstractLocalizer, overwrite: bool = False) → int
+

Detects jobs which have previously been run in this staging directory. +Succeeded jobs are skipped. Failed jobs are reset and rerun

+

+
run_pipeline(dry_run: bool = False) → pandas.core.frame.DataFrame
diff --git a/docs/genindex.html b/docs/genindex.html index 354d0dfa..cba6fa6a 100644 --- a/docs/genindex.html +++ b/docs/genindex.html @@ -312,6 +312,12 @@

I

J

- +
    +
  • setdefault() (canine.utils.ArgumentHelper method) +
  • sinfo() (canine.backends.DockerTransientImageSlurmBackend method)
      diff --git a/docs/objects.inv b/docs/objects.inv index d7fcf8aa..cba218be 100644 Binary files a/docs/objects.inv and b/docs/objects.inv differ diff --git a/docs/searchindex.js b/docs/searchindex.js index 48b02bd2..4cf8e4d6 100644 --- a/docs/searchindex.js +++ b/docs/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["canine/adapters","canine/backends","canine/index","canine/localization","canine/utils","canine/xargs","index"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,sphinx:56},filenames:["canine/adapters.rst","canine/backends.rst","canine/index.rst","canine/localization.rst","canine/utils.rst","canine/xargs.rst","index.rst"],objects:{"":{canine:[2,0,0,"-"]},"canine.Orchestrator":{fill_config:[2,2,1,""],run_pipeline:[2,2,1,""],stringify:[2,2,1,""]},"canine.adapters":{FirecloudAdapter:[0,1,1,""],ManualAdapter:[0,1,1,""]},"canine.adapters.FirecloudAdapter":{evaluate:[0,2,1,""],parse_inputs:[0,2,1,""],parse_outputs:[0,2,1,""],spec:[0,2,1,""]},"canine.adapters.ManualAdapter":{parse_inputs:[0,2,1,""],parse_outputs:[0,2,1,""],spec:[0,2,1,""]},"canine.backends":{DockerTransientImageSlurmBackend:[1,1,1,""],LocalSlurmBackend:[1,1,1,""],LocalTransport:[1,1,1,""],RemoteSlurmBackend:[1,1,1,""],RemoteTransport:[1,1,1,""],TransientGCPSlurmBackend:[1,1,1,""],TransientImageSlurmBackend:[1,1,1,""]},"canine.backends.DockerTransientImageSlurmBackend":{estimate_cost:[1,2,1,""],invoke:[1,2,1,""],list_instances_all_zones:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],stop:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.LocalSlurmBackend":{estimate_cost:[1,2,1,""],invoke:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.LocalTransport":{chmod:[1,2,1,""],exists:[1,2,1,""],isdir:[1,2,1,""],isfile:[1,2,1,""],islink:[1,2,1,""],listdir:[1,2,1,""],makedirs:[1,2,1,""],mkdir:[1,2,1,""],mklink:[1,2,1,""],normpath:[1,2,1,""],open:[1,2,1,""],receive:[1,2,1,""],receivetree:[1,2,1,""],remove:[1,2,1,""],rename:[1,2,1,""],rmdir:[1,2,1,""],send:[1,2,1,""],sendtree:[1,2,1,""],stat:[1,2,1,""],walk:[1,2,1,""]},"canine.backends.RemoteSlurmBackend":{add_key_to_agent:[1,2,1,""],disable_paramiko_rekey:[1,2,1,""],estimate_cost:[1,2,1,""],interactive_login:[1,2,1,""],invoke:[1,2,1,""],load_config_args:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],sbcast:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],ssh_agent:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.RemoteTransport":{chmod:[1,2,1,""],exists:[1,2,1,""],isdir:[1,2,1,""],isfile:[1,2,1,""],islink:[1,2,1,""],listdir:[1,2,1,""],makedirs:[1,2,1,""],mkdir:[1,2,1,""],mklink:[1,2,1,""],normpath:[1,2,1,""],open:[1,2,1,""],receive:[1,2,1,""],receivetree:[1,2,1,""],remove:[1,2,1,""],rename:[1,2,1,""],rmdir:[1,2,1,""],send:[1,2,1,""],sendtree:[1,2,1,""],stat:[1,2,1,""],walk:[1,2,1,""]},"canine.backends.TransientGCPSlurmBackend":{add_key_to_agent:[1,2,1,""],disable_paramiko_rekey:[1,2,1,""],estimate_cost:[1,2,1,""],interactive_login:[1,2,1,""],invoke:[1,2,1,""],load_config_args:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],sbcast:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],ssh_agent:[1,2,1,""],stop:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.TransientImageSlurmBackend":{estimate_cost:[1,2,1,""],invoke:[1,2,1,""],list_instances_all_zones:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],stop:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.localization":{BatchedLocalizer:[3,1,1,""],LocalLocalizer:[3,1,1,""],NFSLocalizer:[3,1,1,""],RemoteLocalizer:[3,1,1,""]},"canine.localization.BatchedLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.localization.LocalLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.localization.NFSLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.localization.RemoteLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.utils":{ArgumentHelper:[4,1,1,""],CustomPricing:[4,1,1,""],check_call:[4,3,1,""],gcp_hourly_cost:[4,3,1,""],get_default_gcp_project:[4,3,1,""],make_interactive:[4,3,1,""]},"canine.utils.ArgumentHelper":{clear:[4,2,1,""],commandline:[4,2,1,""],copy:[4,2,1,""],fromkeys:[4,2,1,""],get:[4,2,1,""],items:[4,2,1,""],keys:[4,2,1,""],pop:[4,2,1,""],popitem:[4,2,1,""],setdefault:[4,2,1,""],translate:[4,2,1,""],update:[4,2,1,""],values:[4,2,1,""]},"canine.utils.CustomPricing":{count:[4,2,1,""],cpu_cost:[4,2,1,""],ext_cost:[4,2,1,""],index:[4,2,1,""],mem_cost:[4,2,1,""],preempt_cpu_cost:[4,2,1,""],preempt_ext_cost:[4,2,1,""],preempt_mem_cost:[4,2,1,""]},"canine.xargs":{Xargs:[5,1,1,""]},"canine.xargs.Xargs":{fill_config:[5,2,1,""],run_pipeline:[5,2,1,""],stringify:[5,2,1,""]},canine:{Orchestrator:[2,1,1,""],adapters:[0,0,0,"-"],backends:[1,0,0,"-"],localization:[3,0,0,"-"],utils:[4,0,0,"-"],xargs:[5,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"byte":1,"case":4,"class":[0,1,2,3,4,5],"default":[1,2,3,4,5],"final":[2,3,5],"float":[1,4],"function":1,"int":[1,4],"new":[3,4],"return":[0,1,2,3,4,5],"static":[1,2,4,5],"transient":1,"true":[0,1,3],For:[1,3],NFS:3,The:[0,1,4],Used:4,Will:1,aarong:1,absolut:3,abstractcontextmanag:3,abstractslurmbackend:[3,5],abstracttransport:3,accept:4,access:1,account:1,across:1,action_on_stop:1,actual:[0,4],adapt:6,add:1,add_key_to_ag:1,added:1,addit:1,after:3,agent:1,alia:[0,4],all:[1,3,4],alreadi:1,also:[0,1,4],ani:[0,1,2,3,4,5],appear:1,appli:[2,5],appropri:0,arbitrari:1,arg:[3,4],argument:[0,1,4],argumenthelp:[1,4],automat:1,avail:1,back:[0,3],backend:[3,5,6],background:1,base:[1,2,3,4,5],batch:[1,3],batchedloc:3,been:1,befor:1,begin:3,behav:1,being:3,between:1,binaryio:[1,4],block:1,boi:0,bool:[0,1,2,3,4,5],both:1,broadcast:[1,3],bucket:3,bufer:4,buffer:[1,4],bufsiz:1,build:[1,3],built:5,bytestream:4,call:[1,3],calledprocesserror:4,can:[1,3],cancel:1,canine_output:[2,3],canine_root:3,central1:1,cfg:[2,5],cga_pipelin:1,chang:1,channel:4,check:[3,4],check_cal:4,chmod:1,choos:3,clean:2,cleanup:5,clear:4,client:1,clock:1,clock_uptim:1,close:3,clust_frac:1,cluster:[1,3,5],cluster_nam:1,cmd:4,code:[0,1,3,4,5],combin:0,command:[1,3,4,5],commandlin:4,common:3,common_dest:3,compat:1,complet:1,compon:3,comput:[1,3,4],compute_disk_s:1,compute_script:1,compute_script_fil:1,compute_zon:1,conf:1,config:[1,2,5],configur:[0,1,2,3,4,5],connect:1,consist:1,consol:[1,4],construct:3,contain:[0,1,2,3,5],content:[1,6],context:3,control:[1,3],controller_disk_s:1,controller_script:1,controller_typ:1,convert:[2,4,5],copi:[1,3,4],core:[1,2,5],correspond:4,cost:[1,4],could:1,count:4,cpu_cost:4,cpu_tim:1,creat:[1,3,4],credenti:3,cryptographi:1,current:[1,4],custompr:4,cwd:5,dalmatian:2,data:3,datafram:[1,2,3,5],deadlock:1,defin:1,definit:1,delet:1,deloc:[3,5],deploi:1,dest:[1,3],destin:3,determin:3,dict:[0,1,2,3,4,5],dictionari:[0,2,3,4,5],dir:[3,5],directli:1,director:3,directori:[1,3,5],dirnam:1,dirpath:1,disabl:1,disable_paramiko_rekei:1,discount:4,disk:4,dispatch:5,dockertransientimageslurmbackend:1,doe:[0,3,4],download:3,driver:1,drop:3,dry_run:[2,5],dummi:1,dump:5,duplic:4,dure:[1,3],each:[0,1,3],echo:1,effici:3,either:[1,4],elast:1,els:4,empti:[3,4],enabl:[0,1],ensur:1,entir:1,entiti:0,entityexpress:0,entitynam:0,entitytyp:0,environ:[1,3],err:1,estim:1,estimate_cost:1,etc:1,etyp:0,evalu:0,except:[0,3],exist:[1,3],exist_okai:[1,3],exit:[1,4],expand:4,expect:[1,3],expr:0,express:0,ext_cost:4,fals:[0,1,2,3,4,5],featur:1,field:4,file:[0,1,3],filenam:1,filepath:[1,2,5],filesystem:1,fill:3,fill_config:[2,5],finalize_staging_dir:3,finish:3,firecloud:0,firecloudadapt:0,first:[1,4],flag:4,flush:4,folder:[1,3],follow:[1,4],form:4,format:[0,1],found:4,frame:[1,2,5],from:[1,2,3,4,5],fromkei:4,full:1,gather:5,gce:1,gcp:[1,4],gcp_hourly_cost:4,gener:1,get:[1,4],get_default_gcp_project:4,get_requester_pai:3,given:[1,2,3,4,5],googl:3,gpu:1,gpu_count:[1,4],gpu_typ:[1,4],gs_copi:3,gs_dircp:3,gsutil:3,handl:[0,4],hard_reset_on_orch_init:1,has:[0,1,4],have:[1,3],hdd_size:4,helper:4,here:3,highcpu:1,hostnam:1,hourli:4,imag:1,image_famili:1,immedi:3,implement:1,includ:[3,4],increas:3,index:[4,6],inform:[1,2,5],inherr:1,init_node_count:1,initi:2,input:[0,1,2,3,5],insert:4,instal:1,instanc:1,interact:[1,4],interactive_login:1,interpret:4,invok:1,isdir:1,isfil:1,islink:1,item:4,iter:[1,3,4],its:[3,4],job:[0,1,2,3,5],job_cpu_tim:1,job_input:3,job_setup_teardown:3,jobid:[0,1,3],kei:[1,4],keyerror:4,keyword:[1,4],kill:1,kwarg:[1,3],lack:4,later:[3,4],less:3,like:[1,4],line:[1,4],list:[0,1],list_instances_all_zon:1,listdir:1,live:1,load:[2,5],load_config_arg:1,local:[0,1,5,6],localfil:1,localize_fil:3,localloc:3,localpath:1,localslurmbackend:1,localtransport:1,locat:[1,3],login_count:1,login_typ:1,machin:4,mai:[1,3,4],main:[1,2],make_interact:4,makedir:1,manag:[2,4],manual:0,manualadapt:0,mark:1,match:1,max_node_count:1,mayb:0,meet:1,mem_cost:4,method:4,miss:3,mkdir:1,mklink:1,mode:1,modul:6,mojav:1,more:3,most:0,mount:3,mount_path:3,move:1,mtype:4,much:0,mung:1,must:[1,3],name:[1,5],need:1,nfs_action_on_stop:1,nfs_compute_script:1,nfs_disk_siz:1,nfs_disk_typ:1,nfs_imag:1,nfslocal:3,node:[1,3],node_uptim:1,none:[0,1,3,4,5],normal:1,normpath:1,note:1,noth:0,now:3,number:[3,4],obj:[2,5],object:[1,2,3,4,5],occurr:4,one:3,onli:[3,4,5],open:[1,3],option:[0,1,3,4,5],orchestr:[2,5],otherwis:[0,4],out:[0,1],output:[0,2,3,5],output_dir:[2,3],outputnam:0,over:1,overhead:1,overrid:3,overwrit:1,pack_batch_script:1,page:6,pai:3,pair:4,panda:[1,2,5],param:4,paramiko:[1,4],parent:3,pars:[0,1,2],parse_input:0,parse_output:0,partit:1,pass:[1,3],path:[0,1,3],pathtyp:3,pattern:3,perform:5,permiss:1,phase:3,pick_common_input:3,pipelin:[2,5],plugin:1,pop:4,popitem:4,posit:4,pre:3,preconfigur:1,predetermin:3,preempt_cpu_cost:4,preempt_ext_cost:4,preempt_mem_cost:4,preemptibl:[1,4],prefix:1,prepar:3,prepare_job_input:3,present:4,pretti:0,price:4,prior:3,probabl:1,product:0,project:[1,3,4],proper:[1,3],properli:1,properti:[0,4],provid:[1,3,4],provision_storage_container_host:1,provision_worker_container_host:1,pydpip:1,python:[1,4],queu:3,queue:1,rais:[0,4],raw:0,read:[1,2,4,5],readi:[1,3],receiv:1,receivetre:[1,3],recent:0,recommend:1,recurs:[1,2,5],region:4,regular:1,rekei:1,rel:[1,3],remot:[1,3],remotefil:1,remoteloc:3,remotepath:1,remoteslurmbackend:1,remotetransport:1,remov:[1,4,5],renam:1,request:[1,3],requir:1,reserve_path:3,resid:3,resourc:5,respons:4,result:[0,3],rmdir:1,run:[1,2,3,4,5],run_pipelin:[2,5],runtim:[2,5],sacct:1,same:[0,1],sbatch:1,sbcast:1,scan:3,scancel:1,schedul:2,script:[1,3],script_path:1,search:6,secondary_disk_s:1,see:1,self:[0,1,3],send:1,sendtre:[1,3],session:1,set:[0,1,3,4],setdefault:4,setup:3,shallow:4,share:[1,4],should:[1,3,4],show:1,similar:3,simplifi:5,sinfo:1,singl:0,size:[3,4],slurm:[1,2,3,4,5],slurm_conf_path:1,slurmctld:1,slurmdbd:1,slurmopt:1,slurmparam:1,some:4,sourc:3,spec:0,specif:[0,1,3,5],specifi:[1,3,4],spun:1,squeue:1,src:[1,3],srun:1,ssd_size:4,ssh2:1,ssh:1,ssh_agent:1,ssh_auth_sock:1,sshclient:1,stage:[3,5],staging_dir:3,standard:1,start:1,startup:3,stat:1,statu:[1,4],stderr:[1,4],stdin:[1,4],stdout:[1,4],step:3,stop:1,storag:3,str:[0,1,2,3,4,5],strategi:3,stream:[1,4],string:[2,4,5],stringifi:[2,5],structur:1,stty:1,stupid:1,subdirectori:1,suitabl:1,sum:1,support:1,sustain:4,symlink:[1,3],take:[0,3,4],task:[2,3],teardown:3,temporari:5,termin:1,thei:1,thi:[1,3,4],those:3,through:[1,2,3,5],too:1,tot_node_count:1,total:1,transfer:3,transfer_bucket:3,transientgcpslurmbackend:1,transientimageslurmbackend:1,translat:4,transport:[1,3],transport_context:3,treat:3,tree:[1,3],tupl:[1,3,4],type:[1,4],union:[0,1,2,5],unlik:3,unpack:[0,1],until:1,updat:4,upload:3,uptim:1,usag:4,use:1,useabl:0,used:3,useful:1,user:[0,1],using:[1,2],usr:1,util:[1,3,6],valid:1,valu:[0,1,4],valueerror:4,vari:4,variabl:[1,3],version:1,view:[3,4],wait_for_cluster_readi:1,walk:1,when:[1,3],where:[1,3],which:[1,3],without:1,work:[1,5],worker:1,worker_prefix:1,worker_typ:1,workspac:0,would:1,write:1,write_to_workspac:0,written:[0,1,4],xarg:6,yield:1,you:1,your:1,zone:1},titles:["canine.adapters","canine.backends","canine","canine.localization","canine.utils","canine.xargs","Welcome to Canine\u2019s documentation!"],titleterms:{adapt:0,backend:1,canin:[0,1,2,3,4,5,6],document:6,indic:6,local:3,tabl:6,util:4,welcom:6,xarg:5}}) \ No newline at end of file +Search.setIndex({docnames:["canine/adapters","canine/backends","canine/index","canine/localization","canine/utils","canine/xargs","index"],envversion:{"sphinx.domains.c":1,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":1,"sphinx.domains.javascript":1,"sphinx.domains.math":2,"sphinx.domains.python":1,"sphinx.domains.rst":1,"sphinx.domains.std":1,sphinx:56},filenames:["canine/adapters.rst","canine/backends.rst","canine/index.rst","canine/localization.rst","canine/utils.rst","canine/xargs.rst","index.rst"],objects:{"":{canine:[2,0,0,"-"]},"canine.Orchestrator":{fill_config:[2,2,1,""],job_avoid:[2,2,1,""],run_pipeline:[2,2,1,""],stringify:[2,2,1,""]},"canine.adapters":{FirecloudAdapter:[0,1,1,""],ManualAdapter:[0,1,1,""]},"canine.adapters.FirecloudAdapter":{evaluate:[0,2,1,""],parse_inputs:[0,2,1,""],parse_outputs:[0,2,1,""],spec:[0,2,1,""]},"canine.adapters.ManualAdapter":{parse_inputs:[0,2,1,""],parse_outputs:[0,2,1,""],spec:[0,2,1,""]},"canine.backends":{DockerTransientImageSlurmBackend:[1,1,1,""],LocalSlurmBackend:[1,1,1,""],LocalTransport:[1,1,1,""],RemoteSlurmBackend:[1,1,1,""],RemoteTransport:[1,1,1,""],TransientGCPSlurmBackend:[1,1,1,""],TransientImageSlurmBackend:[1,1,1,""]},"canine.backends.DockerTransientImageSlurmBackend":{estimate_cost:[1,2,1,""],invoke:[1,2,1,""],list_instances_all_zones:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],stop:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.LocalSlurmBackend":{estimate_cost:[1,2,1,""],invoke:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.LocalTransport":{chmod:[1,2,1,""],exists:[1,2,1,""],isdir:[1,2,1,""],isfile:[1,2,1,""],islink:[1,2,1,""],listdir:[1,2,1,""],makedirs:[1,2,1,""],mkdir:[1,2,1,""],mklink:[1,2,1,""],normpath:[1,2,1,""],open:[1,2,1,""],receive:[1,2,1,""],receivetree:[1,2,1,""],remove:[1,2,1,""],rename:[1,2,1,""],rmdir:[1,2,1,""],rmtree:[1,2,1,""],send:[1,2,1,""],sendtree:[1,2,1,""],stat:[1,2,1,""],walk:[1,2,1,""]},"canine.backends.RemoteSlurmBackend":{add_key_to_agent:[1,2,1,""],disable_paramiko_rekey:[1,2,1,""],estimate_cost:[1,2,1,""],interactive_login:[1,2,1,""],invoke:[1,2,1,""],load_config_args:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],sbcast:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],ssh_agent:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.RemoteTransport":{chmod:[1,2,1,""],exists:[1,2,1,""],isdir:[1,2,1,""],isfile:[1,2,1,""],islink:[1,2,1,""],listdir:[1,2,1,""],makedirs:[1,2,1,""],mkdir:[1,2,1,""],mklink:[1,2,1,""],normpath:[1,2,1,""],open:[1,2,1,""],receive:[1,2,1,""],receivetree:[1,2,1,""],remove:[1,2,1,""],rename:[1,2,1,""],rmdir:[1,2,1,""],rmtree:[1,2,1,""],send:[1,2,1,""],sendtree:[1,2,1,""],stat:[1,2,1,""],walk:[1,2,1,""]},"canine.backends.TransientGCPSlurmBackend":{add_key_to_agent:[1,2,1,""],disable_paramiko_rekey:[1,2,1,""],estimate_cost:[1,2,1,""],interactive_login:[1,2,1,""],invoke:[1,2,1,""],load_config_args:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],sbcast:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],ssh_agent:[1,2,1,""],stop:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.backends.TransientImageSlurmBackend":{estimate_cost:[1,2,1,""],invoke:[1,2,1,""],list_instances_all_zones:[1,2,1,""],pack_batch_script:[1,2,1,""],sacct:[1,2,1,""],sbatch:[1,2,1,""],scancel:[1,2,1,""],sinfo:[1,2,1,""],squeue:[1,2,1,""],srun:[1,2,1,""],stop:[1,2,1,""],transport:[1,2,1,""],wait_for_cluster_ready:[1,2,1,""]},"canine.localization":{BatchedLocalizer:[3,1,1,""],LocalLocalizer:[3,1,1,""],NFSLocalizer:[3,1,1,""],RemoteLocalizer:[3,1,1,""]},"canine.localization.BatchedLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.localization.LocalLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.localization.NFSLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],same_volume:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.localization.RemoteLocalizer":{delocalize:[3,2,1,""],environment:[3,2,1,""],finalize_staging_dir:[3,2,1,""],get_requester_pays:[3,2,1,""],gs_copy:[3,2,1,""],gs_dircp:[3,2,1,""],job_setup_teardown:[3,2,1,""],localize:[3,2,1,""],localize_file:[3,2,1,""],pick_common_inputs:[3,2,1,""],prepare_job_inputs:[3,2,1,""],receivetree:[3,2,1,""],reserve_path:[3,2,1,""],sendtree:[3,2,1,""],transport_context:[3,2,1,""]},"canine.utils":{ArgumentHelper:[4,1,1,""],CustomPricing:[4,1,1,""],check_call:[4,3,1,""],gcp_hourly_cost:[4,3,1,""],get_default_gcp_project:[4,3,1,""],make_interactive:[4,3,1,""]},"canine.utils.ArgumentHelper":{clear:[4,2,1,""],commandline:[4,2,1,""],copy:[4,2,1,""],fromkeys:[4,2,1,""],get:[4,2,1,""],items:[4,2,1,""],keys:[4,2,1,""],pop:[4,2,1,""],popitem:[4,2,1,""],setdefault:[4,2,1,""],translate:[4,2,1,""],update:[4,2,1,""],values:[4,2,1,""]},"canine.utils.CustomPricing":{count:[4,2,1,""],cpu_cost:[4,2,1,""],ext_cost:[4,2,1,""],index:[4,2,1,""],mem_cost:[4,2,1,""],preempt_cpu_cost:[4,2,1,""],preempt_ext_cost:[4,2,1,""],preempt_mem_cost:[4,2,1,""]},"canine.xargs":{Xargs:[5,1,1,""]},"canine.xargs.Xargs":{fill_config:[5,2,1,""],job_avoid:[5,2,1,""],run_pipeline:[5,2,1,""],stringify:[5,2,1,""]},canine:{Orchestrator:[2,1,1,""],adapters:[0,0,0,"-"],backends:[1,0,0,"-"],localization:[3,0,0,"-"],utils:[4,0,0,"-"],xargs:[5,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:function"},terms:{"byte":1,"case":4,"class":[0,1,2,3,4,5],"default":[1,2,3,4,5],"final":[2,3,5],"float":[1,4],"function":1,"int":[1,2,4,5],"new":[3,4],"return":[0,1,2,3,4,5],"static":[1,2,4,5],"transient":1,"true":[0,1,3],For:[1,3],NFS:3,The:[0,1,4],Used:4,Will:1,aarong:1,absolut:3,abstractcontextmanag:3,abstractloc:[2,5],abstractslurmbackend:[3,5],abstracttransport:3,accept:4,access:1,account:1,across:1,action_on_stop:1,actual:[0,4],adapt:6,add:1,add_key_to_ag:1,added:1,addit:1,after:[1,3],agent:1,alia:[0,4],all:[1,3,4],alreadi:1,also:[0,1,4],ani:[0,1,2,3,4,5],appear:1,appli:[2,5],appropri:0,arbitrari:1,arg:[3,4],argument:[0,1,4],argumenthelp:[1,4],automat:1,avail:1,back:[0,3],backend:[3,5,6],background:1,base:[1,2,3,4,5],batch:[1,3],batchedloc:3,been:[1,2,5],befor:1,begin:3,behav:1,being:3,between:1,binaryio:[1,4],block:1,boi:0,bool:[0,1,2,3,4,5],both:1,brief:1,broadcast:[1,3],bucket:3,bufer:4,buffer:[1,4],bufsiz:1,build:[1,3],built:5,bytestream:4,call:[1,3],calledprocesserror:4,can:[1,3],cancel:1,canine_output:[2,3],canine_root:3,central1:1,cfg:[2,5],chang:1,channel:4,check:[3,4],check_cal:4,chmod:1,choos:3,clean:2,cleanup:5,clear:4,client:1,clock:1,clock_uptim:1,close:3,clust_frac:1,cluster:[1,3,5],cluster_nam:1,cmd:4,code:[0,1,3,4,5],combin:0,command:[1,3,4,5],commandlin:4,common:3,common_dest:3,compat:1,complet:1,compon:3,comput:[1,3,4],compute_disk_s:1,compute_script:1,compute_script_fil:1,compute_zon:1,conf:1,config:[1,2,5],configur:[0,1,2,3,4,5],connect:1,consist:1,consol:[1,4],construct:3,contain:[0,1,2,3,5],content:[1,6],context:3,control:[1,3],controller_disk_s:1,controller_script:1,controller_typ:1,convert:[2,4,5],copi:[1,3,4],core:[1,2,5],correspond:4,cost:[1,4],could:1,count:4,cpu_cost:4,cpu_tim:1,creat:[1,3,4],credenti:3,cryptographi:1,current:[1,4],custompr:4,cwd:5,dalmatian:2,data:3,datafram:[1,2,3,5],deadlock:1,defin:1,definit:1,delet:1,deloc:[3,5],deploi:1,dest:[1,3],destin:3,detect:[2,5],determin:3,dict:[0,1,2,3,4,5],dictionari:[0,2,3,4,5],dir:[3,5],directli:1,director:3,directori:[1,2,3,5],dirnam:1,dirpath:1,disabl:1,disable_paramiko_rekei:1,discount:4,disk:4,dispatch:5,docker:1,dockertransientimageslurmbackend:1,doe:[0,3,4],download:3,driver:1,drop:3,dry_run:[2,5],dummi:1,dump:5,duplic:4,dure:[1,3],each:[0,1,3],echo:1,effici:3,either:[1,4],elast:1,els:4,empti:[3,4],enabl:[0,1],ensur:1,entir:1,entiti:0,entityexpress:0,entitynam:0,entitytyp:0,environ:[1,3],err:1,estim:1,estimate_cost:1,etc:1,etyp:0,evalu:0,except:[0,3],exist:[1,3],exist_okai:[1,3],exit:[1,4],expand:4,expect:[1,3],expr:0,express:0,ext_cost:4,fail:[2,5],failur:1,fals:[0,1,2,3,4,5],featur:1,field:4,file:[0,1,3],filenam:1,filepath:[1,2,5],filesystem:1,fill:3,fill_config:[2,5],finalize_staging_dir:3,finish:3,firecloud:0,firecloudadapt:0,first:[1,4],flag:4,flush:4,folder:[1,3],follow:[1,4],form:4,format:[0,1],found:4,frame:[1,2,5],from:[1,2,3,4,5],fromkei:4,full:1,gather:5,gce:1,gcp:[1,4],gcp_hourly_cost:4,gener:1,get:[1,4],get_default_gcp_project:4,get_requester_pai:3,given:[1,2,3,4,5],googl:3,gpu:1,gpu_count:[1,4],gpu_typ:[1,4],gs_copi:3,gs_dircp:3,gsutil:3,handl:[0,4],hard_reset_on_orch_init:1,has:[0,1,4],have:[1,2,3,5],hdd_size:4,helper:4,here:3,highcpu:1,hostnam:1,hourli:4,imag:1,image_famili:1,immedi:3,implement:1,includ:[3,4],increas:3,index:[4,6],inform:[1,2,5],inherr:1,init_node_count:1,initi:2,input:[0,1,2,3,5],insert:4,instal:1,instanc:1,interact:[1,4],interactive_login:1,interpret:4,invok:1,isdir:1,isfil:1,islink:1,item:4,iter:[1,3,4],its:[3,4],job:[0,1,2,3,5],job_avoid:[2,5],job_cpu_tim:1,job_input:3,job_setup_teardown:3,jobid:[0,1,3],kei:[1,4],keyerror:4,keyword:[1,4],kill:1,kill_straggling_job:1,kwarg:[1,3],lack:4,later:[3,4],less:3,like:[1,4],line:[1,4],list:[0,1],list_instances_all_zon:1,listdir:1,live:1,load:[2,5],load_config_arg:1,local:[0,1,2,5,6],localfil:1,localize_fil:3,localloc:3,localpath:1,localslurmbackend:1,localtransport:1,locat:[1,3],login_count:1,login_typ:1,machin:4,mai:[1,3,4],main:[1,2],make_interact:4,makedir:1,manag:[2,4],manual:0,manualadapt:0,mark:1,match:1,max_node_count:1,max_retri:1,mayb:0,meet:1,mem_cost:4,method:4,miss:3,mkdir:1,mklink:1,mode:1,modul:6,mojav:1,more:3,most:0,mount:3,mount_path:3,move:1,mtype:4,much:0,mung:1,must:[1,3],name:[1,5],need:1,nfs_action_on_stop:1,nfs_compute_script:1,nfs_disk_siz:1,nfs_disk_typ:1,nfs_imag:1,nfslocal:3,node:[1,3],node_uptim:1,none:[0,1,3,4,5],normal:1,normpath:1,note:1,noth:0,now:3,number:[3,4],obj:[2,5],object:[1,2,3,4,5],occurr:4,one:3,onli:[3,4,5],open:[1,3],option:[0,1,3,4,5],orchestr:[2,5],otherwis:[0,4],out:[0,1],output:[0,2,3,5],output_dir:[2,3],outputnam:0,over:1,overhead:1,overrid:3,overwrit:[1,2,5],pack_batch_script:1,page:6,pai:3,pair:4,panda:[1,2,5],param:4,paramiko:[1,4],parent:3,pars:[0,1,2],parse_input:0,parse_output:0,partit:1,pass:[1,3],path:[0,1,3],pathtyp:3,pattern:3,perform:5,permiss:1,phase:3,pick_common_input:3,pipelin:[2,5],plugin:1,pop:4,popitem:4,posit:4,pre:3,preconfigur:1,predetermin:3,preempt_cpu_cost:4,preempt_ext_cost:4,preempt_mem_cost:4,preemptibl:[1,4],prefix:1,prepar:3,prepare_job_input:3,present:4,pretti:0,previous:[2,5],price:4,prior:3,probabl:1,product:0,project:[1,3,4],proper:[1,3],properli:1,properti:[0,4],provid:[1,3,4],provision_storage_container_host:1,provision_worker_container_host:1,python:[1,4],queu:3,queue:1,rais:[0,4],raw:0,read:[1,2,4,5],readi:[1,3],receiv:1,receivetre:[1,3],recent:0,recommend:1,recurs:[1,2,5],region:4,regular:1,rekei:1,rel:[1,3],remot:[1,3],remotefil:1,remoteloc:3,remotepath:1,remoteslurmbackend:1,remotetransport:1,remov:[1,4,5],renam:1,request:[1,3],requir:1,rerun:[2,5],reserve_path:3,reset:[2,5],resid:3,resourc:5,respons:4,result:[0,3],retri:1,rmdir:1,rmtree:1,root:1,run:[1,2,3,4,5],run_pipelin:[2,5],runtim:[2,5],sacct:1,same:[0,1,3],same_volum:3,sbatch:1,sbcast:1,scan:3,scancel:1,schedul:2,script:[1,3],script_path:1,search:6,secondary_disk_s:1,see:1,self:[0,1,3],send:1,sendtre:[1,3],seri:2,session:1,set:[0,1,3,4],setdefault:4,setup:3,shallow:4,share:[1,4],should:[1,3,4],show:1,similar:3,simplifi:5,sinfo:1,singl:0,size:[3,4],skip:[2,5],slurm:[1,2,3,4,5],slurm_conf_path:1,slurm_gcp_dock:1,slurmctld:1,slurmdbd:1,slurmopt:1,slurmparam:1,some:4,sourc:3,spec:0,specif:[0,1,3,5],specifi:[1,3,4],spun:1,squeue:1,src:[1,3],srun:1,ssd_size:4,ssh2:1,ssh:1,ssh_agent:1,ssh_auth_sock:1,sshclient:1,stage:[2,3,5],staging_dir:3,standard:1,start:1,startup:3,stat:1,statu:[1,4],stderr:[1,4],stdin:[1,4],stdout:[1,4],step:3,stop:1,storag:3,store:3,str:[0,1,2,3,4,5],strategi:3,stream:[1,4],string:[2,4,5],stringifi:[2,5],structur:1,stty:1,stupid:1,subdirectori:1,succeed:[2,5],suitabl:1,sum:1,support:1,sustain:4,symlink:[1,3],take:[0,3,4],task:[2,3],teardown:3,temporari:5,termin:1,thei:1,thi:[1,2,3,4,5],those:3,through:[1,2,3,5],timeout:1,too:1,tot_node_count:1,total:1,transfer:3,transfer_bucket:3,transientgcpslurmbackend:1,transientimageslurmbackend:1,translat:4,transport:[1,3],transport_context:3,treat:3,tree:[1,3],tupl:[1,3,4],type:[1,4],union:[0,1,2,5],unlik:3,unpack:[0,1],until:1,updat:4,upload:3,uptim:1,usag:4,use:1,useabl:0,used:3,useful:1,user:[0,1],using:[1,2],usr:1,util:[1,3,6],valid:1,valu:[0,1,4],valueerror:4,vari:4,variabl:[1,3],version:1,view:[3,4],wait_for_cluster_readi:1,walk:1,when:[1,3],where:[1,3],which:[1,2,3,5],without:1,work:[1,5],worker:1,worker_prefix:1,worker_typ:1,workspac:0,would:1,write:1,write_to_workspac:0,written:[0,1,4],xarg:6,yield:1,you:1,your:1,zone:1},titles:["canine.adapters","canine.backends","canine","canine.localization","canine.utils","canine.xargs","Welcome to Canine\u2019s documentation!"],titleterms:{adapt:0,backend:1,canin:[0,1,2,3,4,5,6],document:6,indic:6,local:3,tabl:6,util:4,welcom:6,xarg:5}}) \ No newline at end of file diff --git a/setup.py b/setup.py index 29b9241a..866eb043 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ import sys ver_info = sys.version_info -if ver_info < (3,5,4): - raise RuntimeError("canine requires at least python 3.5.4") +if ver_info < (3,7,0): + raise RuntimeError("canine requires at least python 3.7") with open(os.path.join(os.path.dirname(__file__), 'canine', 'orchestrator.py')) as r: version = re.search(r'version = \'(\d+\.\d+\.\d+[-_a-zA-Z0-9]*)\'', r.read()).group(1)