From dc7911fd58f1f8d0e2c56dc42103adad22c6e97d Mon Sep 17 00:00:00 2001 From: Colin Thomas <33940547+colinthomas-z80@users.noreply.github.com> Date: Fri, 6 Sep 2024 09:13:00 -0400 Subject: [PATCH] TaskVineExecutor: write function data to temp directory (#3592) This moves the working directory for TaskVine executor function data to a directory in /tmp These files were previously written adjacent to the logging in the working directory. They may be written in excess when running a large number of tasks. --- parsl/executors/taskvine/executor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/parsl/executors/taskvine/executor.py b/parsl/executors/taskvine/executor.py index 2e1efb211f..a15a444d2c 100644 --- a/parsl/executors/taskvine/executor.py +++ b/parsl/executors/taskvine/executor.py @@ -3,6 +3,7 @@ high-throughput system for delegating Parsl tasks to thousands of remote machines """ +import getpass import hashlib import inspect import itertools @@ -18,6 +19,7 @@ import threading import uuid from concurrent.futures import Future +from datetime import datetime from typing import List, Literal, Optional, Union # Import other libraries @@ -215,9 +217,9 @@ def __create_data_and_logging_dirs(self): # Create directories for data and results log_dir = os.path.join(run_dir, self.label) - self._function_data_dir = os.path.join(run_dir, self.label, "function_data") os.makedirs(log_dir) - os.makedirs(self._function_data_dir) + tmp_prefix = f'{self.label}-{getpass.getuser()}-{datetime.now().strftime("%Y%m%d%H%M%S%f")}-' + self._function_data_dir = tempfile.TemporaryDirectory(prefix=tmp_prefix) # put TaskVine logs outside of a Parsl run as TaskVine caches between runs while # Parsl does not. @@ -227,7 +229,7 @@ def __create_data_and_logging_dirs(self): # factory logs go with manager logs regardless self.factory_config.scratch_dir = self.manager_config.vine_log_dir - logger.debug(f"Function data directory: {self._function_data_dir}, log directory: {log_dir}") + logger.debug(f"Function data directory: {self._function_data_dir.name}, log directory: {log_dir}") logger.debug( f"TaskVine manager log directory: {self.manager_config.vine_log_dir}, " f"factory log directory: {self.factory_config.scratch_dir}") @@ -293,7 +295,7 @@ def _path_in_task(self, executor_task_id, *path_components): 'map': Pickled file with a dict between local parsl names, and remote taskvine names. """ task_dir = "{:04d}".format(executor_task_id) - return os.path.join(self._function_data_dir, task_dir, *path_components) + return os.path.join(self._function_data_dir.name, task_dir, *path_components) def submit(self, func, resource_specification, *args, **kwargs): """Processes the Parsl app by its arguments and submits the function