From 219cdd6377b8377021b8855fd2e7878f42ea46b8 Mon Sep 17 00:00:00 2001 From: doronyaary-runai <166282536+doronyaary-runai@users.noreply.github.com> Date: Sun, 4 Aug 2024 11:30:09 +0300 Subject: [PATCH] skip reachability test in airgapped (#31) * disable some prereq tests in airgapped * disable some prereq tests in airgapped * disable some prereq tests in airgapped * disable some prereq tests in airgapped * disable some prereq tests in airgapped * PR comments --- README.md | 5 +- cmd/preinstall-diagnostics/main.go | 30 ++++---- internal/cmd/cli/cli.go | 7 +- internal/cmd/job/internal_cluster_tests.go | 88 ++++++++++++---------- internal/cmd/job/job.go | 1 - internal/env/env.go | 1 + internal/internal-cluster-tests/nodes.go | 3 + internal/resources/functions.go | 17 ++++- makefile | 2 +- 9 files changed, 92 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 8701c76..7f00044 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ chmod +x ./preinstall-diagnostics-darwin-arm64 && \ --domain ${CONTROL_PLANE_FQDN} \ --cluster-domain ${CLUSTER_FQDN} \ --image-pull-secret ${IMAGE_PULL_SECRET_NAME} \ - --image ${PRIVATE_REGISTRY_IMAGE_URL} + --image ${PRIVATE_REGISTRY_IMAGE_URL} --airgapped ``` ### Example @@ -88,6 +88,9 @@ Usage of ./preinstall-diagnostics-darwin-arm64: URL the Run:AI service to check connectivity to (default "https://app.run.ai") -version Prints the binary version + -airgapped + skip reachability checks to external servers + ``` ## Build diff --git a/cmd/preinstall-diagnostics/main.go b/cmd/preinstall-diagnostics/main.go index 79ada32..5cfe106 100644 --- a/cmd/preinstall-diagnostics/main.go +++ b/cmd/preinstall-diagnostics/main.go @@ -25,6 +25,7 @@ const ( runaiSaasArgName = "saas-address" outputArgName = "output" versionArgName = "version" + airgappedArgName = "airgapped" ) const ( @@ -33,19 +34,18 @@ const ( var ( runInternalClusterTests bool - - clean bool - backendDomainFQDN string - clusterDomainFQDN string - image string - imagePullSecretName string - dryRun bool - runaiContainerRegistry string - runaiSaas string - output string - version bool - - outputFile *os.File + clean bool + backendDomainFQDN string + clusterDomainFQDN string + image string + imagePullSecretName string + dryRun bool + runaiContainerRegistry string + runaiSaas string + output string + version bool + airgapped bool + outputFile *os.File ) func init() { @@ -62,7 +62,7 @@ func init() { flag.StringVar(&runaiSaas, runaiSaasArgName, saas.RunAISaasAddress, "URL the Run:AI service to check connectivity to") flag.StringVar(&output, outputArgName, defaultOutputFileName, "File to save the output to") flag.BoolVar(&version, versionArgName, false, "Prints the binary version") - + flag.BoolVar(&airgapped, airgappedArgName, false, "skip tests that require network access") flag.Parse() } @@ -84,6 +84,6 @@ func main() { logger := log.NewLogger(outputFile) cli.Main(clean, dryRun, backendDomainFQDN, clusterDomainFQDN, image, - imagePullSecretName, runaiContainerRegistry, runaiSaas, version, logger) + imagePullSecretName, runaiContainerRegistry, runaiSaas, version, airgapped, logger) } } diff --git a/internal/cmd/cli/cli.go b/internal/cmd/cli/cli.go index 1c05ef7..16e2086 100644 --- a/internal/cmd/cli/cli.go +++ b/internal/cmd/cli/cli.go @@ -21,14 +21,17 @@ import ( ) func Main(clean, dryRun bool, backendFQDN, clusterFQDN, image, imagePullSecretName, imageRegistry, - runaiSaas string, version bool, logger *log.Logger) { + runaiSaas string, version bool, airgapped bool, logger *log.Logger) { + if airgapped { + fmt.Println("airgapped") + } if version { fmt.Println(ver.Version) return } creationOrder, deletionOrder := resources.TemplateResources(backendFQDN, image, - imagePullSecretName, imageRegistry, runaiSaas) + imagePullSecretName, imageRegistry, runaiSaas, airgapped) if dryRun { err := resources.PrintResources(creationOrder) diff --git a/internal/cmd/job/internal_cluster_tests.go b/internal/cmd/job/internal_cluster_tests.go index 551f20f..8c8ec75 100644 --- a/internal/cmd/job/internal_cluster_tests.go +++ b/internal/cmd/job/internal_cluster_tests.go @@ -2,13 +2,14 @@ package job import ( v2 "github.com/run-ai/preinstall-diagnostics/internal" + "github.com/run-ai/preinstall-diagnostics/internal/env" internal_cluster_tests2 "github.com/run-ai/preinstall-diagnostics/internal/internal-cluster-tests" "github.com/run-ai/preinstall-diagnostics/internal/log" + "strconv" ) func runTestsAndAppendResults(logger *log.Logger) []v2.TestResult { var testResults []v2.TestResult - osInfo, err := internal_cluster_tests2.ShowOSInfo() if err != nil { testResults = append(testResults, v2.TestResult{ @@ -61,51 +62,56 @@ func runTestsAndAppendResults(logger *log.Logger) []v2.TestResult { }) } - reachable, err := internal_cluster_tests2.RunAIAuthProviderReachable() + airgapped, err := strconv.ParseBool(env.EnvOrDefault(env.AirgappedEnvVar, "false")) if err != nil { - testResults = append(testResults, v2.TestResult{ - Name: "RunAI Auth Provider Reachable", - Result: false, - Message: err.Error(), - }) - } else { - testResults = append(testResults, v2.TestResult{ - Name: "RunAI Auth Provider Reachable", - Result: reachable, - Message: "", - }) + airgapped = false } + if !airgapped { + reachable, err := internal_cluster_tests2.RunAIAuthProviderReachable() + if err != nil { + testResults = append(testResults, v2.TestResult{ + Name: "RunAI Auth Provider Reachable", + Result: false, + Message: err.Error(), + }) + } else { + testResults = append(testResults, v2.TestResult{ + Name: "RunAI Auth Provider Reachable", + Result: reachable, + Message: "", + }) + } - reachable, err = internal_cluster_tests2.RunAIPrometheusReachable() - if err != nil { - testResults = append(testResults, v2.TestResult{ - Name: "RunAI Prometheus Reachable", - Result: false, - Message: err.Error(), - }) - } else { - testResults = append(testResults, v2.TestResult{ - Name: "RunAI Prometheus Reachable", - Result: reachable, - Message: "", - }) - } + reachable, err = internal_cluster_tests2.RunAIPrometheusReachable() + if err != nil { + testResults = append(testResults, v2.TestResult{ + Name: "RunAI Prometheus Reachable", + Result: false, + Message: err.Error(), + }) + } else { + testResults = append(testResults, v2.TestResult{ + Name: "RunAI Prometheus Reachable", + Result: reachable, + Message: "", + }) + } - reachable, err = internal_cluster_tests2.RunAIBackendReachable() - if err != nil { - testResults = append(testResults, v2.TestResult{ - Name: "RunAI Backend Reachable", - Result: false, - Message: err.Error(), - }) - } else { - testResults = append(testResults, v2.TestResult{ - Name: "RunAI Backend Reachable", - Result: reachable, - Message: "", - }) + reachable, err = internal_cluster_tests2.RunAIBackendReachable() + if err != nil { + testResults = append(testResults, v2.TestResult{ + Name: "RunAI Backend Reachable", + Result: false, + Message: err.Error(), + }) + } else { + testResults = append(testResults, v2.TestResult{ + Name: "RunAI Backend Reachable", + Result: reachable, + Message: "", + }) + } } - err = internal_cluster_tests2.CheckNodeConnectivity(logger) if err != nil { testResults = append(testResults, v2.TestResult{ diff --git a/internal/cmd/job/job.go b/internal/cmd/job/job.go index 0385790..06b4b9d 100644 --- a/internal/cmd/job/job.go +++ b/internal/cmd/job/job.go @@ -3,7 +3,6 @@ package job import ( "context" "encoding/json" - v2 "github.com/run-ai/preinstall-diagnostics/internal" "github.com/run-ai/preinstall-diagnostics/internal/env" "github.com/run-ai/preinstall-diagnostics/internal/k8sclient" diff --git a/internal/env/env.go b/internal/env/env.go index 01d5fd5..ecdb853 100644 --- a/internal/env/env.go +++ b/internal/env/env.go @@ -17,6 +17,7 @@ const ( RegistryEnvVar = "REGISTRY" RunAISaasEnvVar = "RUNAI_SAAS" + AirgappedEnvVar = "AIRGAPPED" ) func EnvOrError(envVar string) (string, error) { diff --git a/internal/internal-cluster-tests/nodes.go b/internal/internal-cluster-tests/nodes.go index 28db67e..f493542 100644 --- a/internal/internal-cluster-tests/nodes.go +++ b/internal/internal-cluster-tests/nodes.go @@ -112,6 +112,9 @@ func WaitForJobPodsToBeRunning(logger *log.Logger) error { logger.LogF("waiting for pods to be available...") availablePods := 0 for _, job := range jobs.Items { + if job.Status.Ready == nil { + continue + } availablePods += int(*job.Status.Ready) } diff --git a/internal/resources/functions.go b/internal/resources/functions.go index 89a97bb..c76d0c4 100644 --- a/internal/resources/functions.go +++ b/internal/resources/functions.go @@ -93,7 +93,7 @@ func CreateResources(objs []client.Object, kubeDynamicClient dynamic.Interface) } func TemplateResources(backendFQDN, image, imagePullSecretName, - imageRegistry, runaiSaas string) (creationOrder, deletionOrder []client.Object) { + imageRegistry, runaiSaas string, airgapped bool) (creationOrder, deletionOrder []client.Object) { creationOrder = []client.Object{} k8s, err := k8sclient.ClientSet() @@ -121,6 +121,21 @@ func TemplateResources(backendFQDN, image, imagePullSecretName, }, } } + if airgapped { + job.Spec.Template.Spec.Containers[0].Env = + append(job.Spec.Template.Spec.Containers[0].Env, + v1.EnvVar{ + Name: env.AirgappedEnvVar, + Value: "true", + }) + } else { + job.Spec.Template.Spec.Containers[0].Env = + append(job.Spec.Template.Spec.Containers[0].Env, + v1.EnvVar{ + Name: env.AirgappedEnvVar, + Value: "false", + }) + } if imageRegistry != "" { job.Spec.Template.Spec.Containers[0].Env = diff --git a/makefile b/makefile index 2e0daf7..33102a7 100644 --- a/makefile +++ b/makefile @@ -2,7 +2,7 @@ OUT_DIR=_out BIN=preinstall-diagnostics REGISTRY=gcr.io/run-ai-lab -VERSION=v2.16.19 +VERSION=v2.18.14 IMAGE_DOCKER_FILE=cmd/preinstall-diagnostics/Dockerfile IMAGE=$(REGISTRY)/preinstall-diagnostics:$(VERSION)