From 53b7d86d169ed0ba946b1deeee71bc6c8a24cf16 Mon Sep 17 00:00:00 2001 From: Sergey Smolnikov Date: Thu, 4 Jul 2024 13:44:10 +0200 Subject: [PATCH 1/3] Info about job-scheduler batch statuses. Updated refs --- .../docs/guides/jobs/configure-jobs.md | 7 +++ public-site/docs/radix-config/index.md | 61 +++++++++++++++++++ public-site/package-lock.json | 21 ++++--- public-site/package.json | 15 ++--- 4 files changed, 87 insertions(+), 17 deletions(-) diff --git a/public-site/docs/guides/jobs/configure-jobs.md b/public-site/docs/guides/jobs/configure-jobs.md index d3ccbadc..bafb0671 100644 --- a/public-site/docs/guides/jobs/configure-jobs.md +++ b/public-site/docs/guides/jobs/configure-jobs.md @@ -41,6 +41,12 @@ spec: node: gpu: nvidia-k80 gpuCount: 2 + batchStatusRules: + - condition: Any + operator: In + jobStatuses: + - Failed + batchStatus: Failed ``` ## Options @@ -61,6 +67,7 @@ Jobs have three extra configuration options; `schedulerPort`, `payload` and `tim - `timeLimitSeconds` (optional) defines maximum running time for a job. - `backoffLimit` (optional) defines the number of times a job will be restarted if its container exits in error. - `notifications.webhook` (optional) the Radix application component or job component endpoint, where Radix batch events will be posted when any of its job-component's running jobs or batches changes states. +- `batchStatusRules` - (optional) rules to define batch statuses by their jobs statuses. See [batchStatusRules](/radix-config/index.md#batchstatusrules) for a job for more information. ### schedulerPort diff --git a/public-site/docs/radix-config/index.md b/public-site/docs/radix-config/index.md index 8065370f..516db467 100644 --- a/public-site/docs/radix-config/index.md +++ b/public-site/docs/radix-config/index.md @@ -1197,6 +1197,41 @@ spec: `webhook` is an optional URL to the Radix application component or job component which will be called when any of the job-component's running jobs or batches changes states. Only changes are sent by POST method with a `application/json` `ContentType` in a [batch event format](/guides/jobs/notifications.md#radix-batch-event). Read [more](/guides/jobs/notifications) +### `batchStatusRules` + +```yaml +spec: + jobs: + - name: compute + batchStatusRules: + - condition: Any + operator: In + jobStatuses: + - Failed + batchStatus: Failed + - condition: All + operator: NotIn + jobStatuses: + - Waiting + - Active + - Running + batchStatus: Completed +``` +`batchStatusRules` - Optional rules to define batch statuses by their jobs statuses. +- `condition` - `Any`, `All` +- `operator` - `In`, `NotIn` +- `jobStatuses` - `Waiting`, `Active`, `Running`, `Succeeded`, `Failed`, `Stopped` +- `batchStatus` - `Running`, `Succeeded`, `Failed`, `Waiting`, `Stopping`, `Stopped`, `DeadlineExceeded`, `Active`, `Completed` + +Rules are applied in the order from top to bottom in the rules list. When any rule matches, rules following it are ignored. + +If `batchStatusRules` are not defined or no rules match - following rules are applied: +* No jobs are started - the batch status is `Waiting` +* Any jobs are in `Active` or `Running` state - the batch status is `Active` +* No jobs are in `Waiting`, `Active` or `Running` states - the batch status is `Completed` + +`batchStatusRules` [can be overridden](#batchstatusrules-1) for individual environments. + ### `monitoring` ```yaml @@ -1375,6 +1410,32 @@ spec: See [notifications](#notifications) for a component for more information. +### `batchStatusRules` + +```yaml +spec: + jobs: + - name: compute + batchStatusRules: + - condition: All + operator: NotIn + jobStatuses: + - Waiting + - Active + - Running + batchStatus: Completed + environmentConfig: + - environment: prod + batchStatusRules: + - condition: All + operator: In + jobStatuses: + - Succeeded + batchStatus: Succeeded +``` +When `batchStatusRules` is defined for an environment it fully overrides the job's `batchStatusRules`. +See [batchStatusRules](#batchstatusrules) for a job for more information. + #### `monitoring` ```yaml diff --git a/public-site/package-lock.json b/public-site/package-lock.json index 6dbae0fa..3bd03907 100644 --- a/public-site/package-lock.json +++ b/public-site/package-lock.json @@ -8,20 +8,20 @@ "name": "public-site", "version": "0.0.0", "dependencies": { - "@docusaurus/preset-classic": "^3.2.1", + "@docusaurus/preset-classic": "^3.4.0", "@mdx-js/react": "^3.0.1", "clsx": "^2.1.1", - "docusaurus-lunr-search": "^3.3.2", + "docusaurus-lunr-search": "^3.4.0", "prism-react-renderer": "^2.3.1", "react": "^18.3.1", "react-dom": "^18.3.1", - "sass": "^1.75.0" + "sass": "^1.77.6" }, "devDependencies": { - "@docusaurus/module-type-aliases": "^3.2.1", - "@docusaurus/tsconfig": "^3.2.1", - "@docusaurus/types": "^3.2.1", - "typescript": "~5.4.5" + "@docusaurus/module-type-aliases": "^3.4.0", + "@docusaurus/tsconfig": "^3.4.0", + "@docusaurus/types": "^3.4.0", + "typescript": "~5.5.2" }, "engines": { "node": ">=18.0" @@ -14351,9 +14351,10 @@ } }, "node_modules/typescript": { - "version": "5.4.5", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", - "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "version": "5.5.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.3.tgz", + "integrity": "sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==", + "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/public-site/package.json b/public-site/package.json index 9f6211de..444eaa14 100644 --- a/public-site/package.json +++ b/public-site/package.json @@ -15,20 +15,20 @@ "typecheck": "tsc" }, "dependencies": { - "@docusaurus/preset-classic": "^3.2.1", + "@docusaurus/preset-classic": "^3.4.0", "@mdx-js/react": "^3.0.1", "clsx": "^2.1.1", - "docusaurus-lunr-search": "^3.3.2", + "docusaurus-lunr-search": "^3.4.0", "prism-react-renderer": "^2.3.1", "react": "^18.3.1", "react-dom": "^18.3.1", - "sass": "^1.75.0" + "sass": "^1.77.6" }, "devDependencies": { - "@docusaurus/module-type-aliases": "^3.2.1", - "@docusaurus/tsconfig": "^3.2.1", - "@docusaurus/types": "^3.2.1", - "typescript": "~5.4.5" + "@docusaurus/module-type-aliases": "^3.4.0", + "@docusaurus/tsconfig": "^3.4.0", + "@docusaurus/types": "^3.4.0", + "typescript": "~5.5.2" }, "browserslist": { "production": [ @@ -46,3 +46,4 @@ "node": ">=18.0" } } + From 292f7df5e4f382762338e2f31a16044b09e4d2cf Mon Sep 17 00:00:00 2001 From: Sergey Smolnikov Date: Thu, 4 Jul 2024 13:56:52 +0200 Subject: [PATCH 2/3] Info about job-scheduler batch statuses. --- public-site/docs/radix-config/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public-site/docs/radix-config/index.md b/public-site/docs/radix-config/index.md index 516db467..408aa637 100644 --- a/public-site/docs/radix-config/index.md +++ b/public-site/docs/radix-config/index.md @@ -1230,6 +1230,8 @@ If `batchStatusRules` are not defined or no rules match - following rules are ap * Any jobs are in `Active` or `Running` state - the batch status is `Active` * No jobs are in `Waiting`, `Active` or `Running` states - the batch status is `Completed` +Batch statuses, default or defined by rules, are the same in the Radix console, returned by [job notifications](/guides/jobs/notifications.md) and [Job Manager API](/guides/jobs/job-manager-and-job-api.md). If rules are changed, they will be applied on next deployment of an application environment, also affecting already existing batches statuses in this environment. + `batchStatusRules` [can be overridden](#batchstatusrules-1) for individual environments. ### `monitoring` From f8b343eba5b1e60b27844d401b6e1a5ec1e47085 Mon Sep 17 00:00:00 2001 From: Sergey Smolnikov Date: Thu, 4 Jul 2024 14:02:20 +0200 Subject: [PATCH 3/3] Info about job-scheduler batch statuses. --- public-site/docs/radix-config/index.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/public-site/docs/radix-config/index.md b/public-site/docs/radix-config/index.md index 408aa637..d21c1528 100644 --- a/public-site/docs/radix-config/index.md +++ b/public-site/docs/radix-config/index.md @@ -1221,14 +1221,14 @@ spec: - `condition` - `Any`, `All` - `operator` - `In`, `NotIn` - `jobStatuses` - `Waiting`, `Active`, `Running`, `Succeeded`, `Failed`, `Stopped` -- `batchStatus` - `Running`, `Succeeded`, `Failed`, `Waiting`, `Stopping`, `Stopped`, `DeadlineExceeded`, `Active`, `Completed` +- `batchStatus` - `Waiting`, `Active`, `Running`, `Succeeded`, `Failed`, `Stopping`, `Stopped`, `DeadlineExceeded`, `Completed` Rules are applied in the order from top to bottom in the rules list. When any rule matches, rules following it are ignored. -If `batchStatusRules` are not defined or no rules match - following rules are applied: -* No jobs are started - the batch status is `Waiting` -* Any jobs are in `Active` or `Running` state - the batch status is `Active` -* No jobs are in `Waiting`, `Active` or `Running` states - the batch status is `Completed` +If `batchStatusRules` are not defined or no rules match a batch status is set by following rules: +* `Waiting` - no jobs are started +* `Active` - any jobs are in `Active` or `Running` state +* `Completed` - no jobs are in `Waiting`, `Active` or `Running` states Batch statuses, default or defined by rules, are the same in the Radix console, returned by [job notifications](/guides/jobs/notifications.md) and [Job Manager API](/guides/jobs/job-manager-and-job-api.md). If rules are changed, they will be applied on next deployment of an application environment, also affecting already existing batches statuses in this environment.