Skip to content

Commit

Permalink
Vine: Add symlink mount option. (#3527)
Browse files Browse the repository at this point in the history
* Add VINE_MOUNT_SYMLINK option that permits the worker to symlink instead of hard link objects.
This is provided since recursively hard-linking an object could be quite expensive.

* Remove disable-symlinks option.

* Add mount_symlink option for files.

* Use mount_symlink option when creating serverless packages.

* Add mount flag options to add_input

* Reverse sense of symlink return code.

* Add test for symlink option.

* format

* format

* build-conda depends upon lint

* Bitwise or

* Remove mistaken mount_symlink in file flags.
  • Loading branch information
dthain authored Oct 3, 2023
1 parent 84c03b0 commit 2f558c8
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 14 deletions.
1 change: 1 addition & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ jobs:
tag: ${{ github.event.inputs.tag }}

build-conda:
needs: lint
runs-on: ubuntu-latest
timeout-minutes: 60
env:
Expand Down
8 changes: 5 additions & 3 deletions taskvine/src/bindings/python3/ndcctools/taskvine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _free(self):
self._task = None

@staticmethod
def _determine_mount_flags(watch=False, failure_only=False, success_only=False, strict_input=False):
def _determine_mount_flags(watch=False, failure_only=False, success_only=False, strict_input=False, mount_symlink=False):
flags = cvine.VINE_TRANSFER_ALWAYS
if watch:
flags |= cvine.VINE_WATCH
Expand All @@ -134,6 +134,8 @@ def _determine_mount_flags(watch=False, failure_only=False, success_only=False,
flags |= cvine.VINE_SUCCESS_ONLY
if strict_input:
flags |= cvine.VINE_FIXED_LOCATION
if mount_symlink:
flags |= cvine.VINE_MOUNT_SYMLINK
return flags

@staticmethod
Expand Down Expand Up @@ -256,12 +258,12 @@ def add_feature(self, name):
# >>> f = m.declare_untar(url)
# >>> task.add_input(f,"data")
# @endcode
def add_input(self, file, remote_name, strict_input=False):
def add_input(self, file, remote_name, strict_input=False, mount_symlink=False):
# SWIG expects strings
if not isinstance(remote_name, str):
raise TypeError(f"remote_name {remote_name} is not a str")

flags = Task._determine_mount_flags(strict_input=strict_input)
flags = Task._determine_mount_flags(strict_input=strict_input, mount_symlink=mount_symlink)

if cvine.vine_task_add_input(self._task, file._file, remote_name, flags)==0:
raise ValueError("invalid file description")
Expand Down
3 changes: 2 additions & 1 deletion taskvine/src/manager/taskvine.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ typedef enum {
VINE_WATCH = 2, /**< Watch the output file and send back changes as the task runs. */
VINE_FAILURE_ONLY = 4, /**< Only return this output file if the task failed. (Useful for returning large log files.) */
VINE_SUCCESS_ONLY = 8, /**< Only return this output file if the task succeeded. */
VINE_RETRACT_ON_RESET = 16 /**< Remove this file from the mount lists if the task is reset. (TaskVine internal use only.) */
VINE_RETRACT_ON_RESET = 16, /**< Remove this file from the mount lists if the task is reset. (TaskVine internal use only.) */
VINE_MOUNT_SYMLINK = 32 /**< Permit this directory to be mounted via symlink instead of hardlink. */
} vine_mount_flags_t;

/** Control caching and sharing behavior of file objects.
Expand Down
11 changes: 10 additions & 1 deletion taskvine/src/worker/vine_sandbox.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,16 @@ static int stage_input_file(struct vine_process *p, struct vine_mount *m, struct
if (status == VINE_CACHE_STATUS_READY) {
create_dir_parents(sandbox_path, 0777);
debug(D_VINE, "input: link %s -> %s", cache_path, sandbox_path);
result = file_link_recursive(cache_path, sandbox_path, vine_worker_symlinks_enabled);
if (m->flags & VINE_MOUNT_SYMLINK) {
/* If the user has requested a symlink, just do that b/c it is faster for large dirs. */
result = symlink(cache_path, sandbox_path);
/* Change sense of Unix result to true/false. */
result = !result;
} else {
/* Otherwise recursively hard-link the object into the sandbox. */
result = file_link_recursive(cache_path, sandbox_path, 1);
}

if (!result)
debug(D_VINE,
"couldn't link %s into sandbox as %s: %s",
Expand Down
8 changes: 0 additions & 8 deletions taskvine/src/worker/vine_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,6 @@ static int sigchld_received_flag = 0;
// Password shared between manager and worker.
char *vine_worker_password = 0;

// Allow worker to use symlinks when link() fails. Enabled by default.
int vine_worker_symlinks_enabled = 1;

int mini_task_id = 0;

// Worker id. A unique id for this worker instance.
Expand Down Expand Up @@ -2123,7 +2120,6 @@ enum {
LONG_OPT_DISK,
LONG_OPT_DISK_PERCENT,
LONG_OPT_GPUS,
LONG_OPT_DISABLE_SYMLINKS,
LONG_OPT_IDLE_TIMEOUT,
LONG_OPT_CONNECT_TIMEOUT,
LONG_OPT_SINGLE_SHOT,
Expand Down Expand Up @@ -2153,7 +2149,6 @@ static const struct option long_options[] = {{"advertise", no_argument, 0, 'a'},
{"min-backoff", required_argument, 0, 'i'},
{"max-backoff", required_argument, 0, 'b'},
{"single-shot", no_argument, 0, LONG_OPT_SINGLE_SHOT},
{"disable-symlinks", no_argument, 0, LONG_OPT_DISABLE_SYMLINKS},
{"disk-threshold", required_argument, 0, 'z'},
{"memory-threshold", required_argument, 0, LONG_OPT_MEMORY_THRESHOLD},
{"arch", required_argument, 0, 'A'},
Expand Down Expand Up @@ -2332,9 +2327,6 @@ int main(int argc, char *argv[])
warn(D_NOTICE, "Ignoring --wall-time, a positive integer is expected.");
}
break;
case LONG_OPT_DISABLE_SYMLINKS:
vine_worker_symlinks_enabled = 0;
break;
case LONG_OPT_SINGLE_SHOT:
single_shot_mode = 1;
break;
Expand Down
1 change: 0 additions & 1 deletion taskvine/src/worker/vine_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
void vine_worker_send_cache_update( struct link *manager, const char *cachename, int64_t size, timestamp_t transfer_time, timestamp_t transfer_start );
void vine_worker_send_cache_invalid( struct link *manager, const char *cachename, const char *message );

extern int vine_worker_symlinks_enabled;
extern char *vine_worker_password;

#endif
13 changes: 13 additions & 0 deletions taskvine/test/vine_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,19 @@ def next_output_name():
t = q.wait(wait_time)
report_task(t, "success", 0, [path.join(test_dir, output_name)])

# same thing, but this time symlink the input directory.
output_name = next_output_name()
t = vine.Task(f"cd my_dir && ./{exec_name} {input_name} 2>&1 > {output_name}")
in_dir = q.declare_file(test_dir, cache=True)
t.add_input(exec_file, exec_name)
t.add_input(in_dir, "my_dir", mount_symlink=True)
output_file = q.declare_file(path.join(test_dir, output_name), cache=False)
t.add_output(output_file, path.join("my_dir", output_name))

q.submit(t)
t = q.wait(wait_time)
report_task(t, "success", 0, [path.join(test_dir, output_name)])

# we bring back the outputs from a directory:
output_name = next_output_name()
t = vine.Task(f"mkdir outs && ./{exec_name} {input_name} 2>&1 > outs/{output_name}")
Expand Down

0 comments on commit 2f558c8

Please sign in to comment.