From e38efcad86e160e629c46f0fac18d4f74c2a2fa9 Mon Sep 17 00:00:00 2001 From: Thanh Son Phung Date: Thu, 21 Sep 2023 12:58:41 -0400 Subject: [PATCH] Vine: Make conservative disk estimates when the amount of disk is not given to worker (#3501) * skip disk * check lib exist * add lib ready check * set default disk value * format * guard and clarify * Revert "add lib ready check" This reverts commit e7fa1fb960163b9ba2abf63eb5318777e4f2161f. --- taskvine/src/worker/vine_worker.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/taskvine/src/worker/vine_worker.c b/taskvine/src/worker/vine_worker.c index fe5f69a847..b30b0b43bb 100644 --- a/taskvine/src/worker/vine_worker.c +++ b/taskvine/src/worker/vine_worker.c @@ -157,6 +157,12 @@ static struct vine_watcher *watcher = 0; /* The resources measured and available at this worker. */ static struct vine_resources *total_resources = 0; +/* When the amount of disk is not specified, manually set the reporting disk to + * this percentage of the measured disk. This safeguards the fact that disk measurements + * are estimates and thus may unncessarily forsaken tasks with unspecified resources. + * Defaults to 90%. */ +static int disk_percent = 90; + static int64_t last_task_received = 0; /* 0 means not given as a command line option. */ @@ -320,6 +326,10 @@ static void measure_worker_resources() if (manual_disk_option > 0) { r->disk.total = MIN(r->disk.total, manual_disk_option); + } else { + /* Set the reporting disk to a fraction of the measured disk to avoid + * unnecessarily forsaking tasks with unspecified resources. */ + r->disk.total = ceil(r->disk.total * disk_percent / 100); } r->disk.inuse = measure_worker_disk(); @@ -1133,11 +1143,9 @@ Return true if this task can run with the resources currently available. static int task_resources_fit_now(struct vine_task *t) { - /* XXX removed disk space check due to problems running workers locally or multiple workers on a single node - * since default tasks request the entire reported disk space. questionable if this check useful in practice.*/ return (cores_allocated + t->resources_requested->cores <= total_resources->cores.total) && (memory_allocated + t->resources_requested->memory <= total_resources->memory.total) && - (1) && // disk_allocated + t->resources_requested->disk <= total_resources->disk.total) && + (disk_allocated + t->resources_requested->disk <= total_resources->disk.total) && (gpus_allocated + t->resources_requested->gpus <= total_resources->gpus.total); } @@ -2088,6 +2096,10 @@ static void show_help(const char *cmd) printf(" %-30s Manually set the amount of disk (in MB) reported by this worker.\n", "--disk="); printf(" %-30s If not given, or less than 1, then try to detect disk space available.\n", ""); + printf(" %-30s Set the conservative disk reporting percent when --disk is unspecified.\n", + "--disk-percent="); + printf(" %-30s Defaults to %d.\n", "", disk_percent); + printf(" %-30s Use loop devices for task sandboxes (default=disabled, requires root access).\n", "--disk-allocation"); printf(" %-30s Specifies a user-defined feature the worker provides. May be specified several times.\n", @@ -2109,6 +2121,7 @@ enum { LONG_OPT_CORES, LONG_OPT_MEMORY, LONG_OPT_DISK, + LONG_OPT_DISK_PERCENT, LONG_OPT_GPUS, LONG_OPT_DISABLE_SYMLINKS, LONG_OPT_IDLE_TIMEOUT, @@ -2150,6 +2163,7 @@ static const struct option long_options[] = {{"advertise", no_argument, 0, 'a'}, {"cores", required_argument, 0, LONG_OPT_CORES}, {"memory", required_argument, 0, LONG_OPT_MEMORY}, {"disk", required_argument, 0, LONG_OPT_DISK}, + {"disk-percent", required_argument, 0, LONG_OPT_DISK_PERCENT}, {"gpus", required_argument, 0, LONG_OPT_GPUS}, {"wall-time", required_argument, 0, LONG_OPT_WALL_TIME}, {"help", no_argument, 0, 'h'}, @@ -2296,6 +2310,14 @@ int main(int argc, char *argv[]) manual_disk_option = atoll(optarg); } break; + case LONG_OPT_DISK_PERCENT: + if (!strncmp(optarg, "all", 3)) { + disk_percent = 100; + } else { + /* guard the disk percent value to [0, 100]. */ + disk_percent = MIN(100, MAX(atoi(optarg), 0)); + } + break; case LONG_OPT_GPUS: if (!strncmp(optarg, "all", 3)) { manual_gpus_option = -1;