From d56270f2d0ecec3d3b4e87914edf87a9081de908 Mon Sep 17 00:00:00 2001 From: carlopignatiello Date: Thu, 12 Dec 2024 16:43:16 +0100 Subject: [PATCH] feat: add spark-test service --- docker-compose.yaml | 10 ++++++++++ spark-test/Dockerfile | 11 +++++++++++ spark-test/conf.py | 14 ++++++++++++++ spark-test/main.py | 37 +++++++++++++++++++++++++++++++++++++ spark-test/requirements.txt | 1 + 5 files changed, 73 insertions(+) create mode 100644 spark-test/Dockerfile create mode 100644 spark-test/conf.py create mode 100644 spark-test/main.py create mode 100644 spark-test/requirements.txt diff --git a/docker-compose.yaml b/docker-compose.yaml index 4227869e..42f727d4 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -203,6 +203,16 @@ services: start_period: 5s retries: 2 + spark-test: + profiles: ["spark-test"] + build: ./spark-test + environment: + JOB_NAME: "completion" + KUBECONFIG_FILE_PATH: "/opt/kubeconfig/kubeconfig.yaml" + SPARK_IMAGE: "radicalbit-spark-py:develop" + volumes: + - ./docker/k3s_data/kubeconfig/kubeconfig.yaml:/opt/kubeconfig/kubeconfig.yaml + dind: image: docker:dind privileged: true diff --git a/spark-test/Dockerfile b/spark-test/Dockerfile new file mode 100644 index 00000000..369dd8f9 --- /dev/null +++ b/spark-test/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.11.8-slim + +WORKDIR /spark-test + +COPY requirements.txt requirements.txt + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD ["python3", "main.py"] \ No newline at end of file diff --git a/spark-test/conf.py b/spark-test/conf.py new file mode 100644 index 00000000..4165da02 --- /dev/null +++ b/spark-test/conf.py @@ -0,0 +1,14 @@ +def create_secrets(): + return { + "AWS_ACCESS_KEY_ID": "minio", + "AWS_SECRET_ACCESS_KEY": "minio123", + "AWS_REGION": "us-east-1", + "S3_ENDPOINT_URL": "http://minio:9000", + "POSTGRES_URL": "jdbc:postgresql://postgres:5432/radicalbit", + "POSTGRES_DB": "radicalbit", + "POSTGRES_HOST": "postgres", + "POSTGRES_PORT": "5432", + "POSTGRES_USER": "postgres", + "POSTGRES_PASSWORD": "postgres", + "POSTGRES_SCHEMA": "public", + } diff --git a/spark-test/main.py b/spark-test/main.py new file mode 100644 index 00000000..268e052c --- /dev/null +++ b/spark-test/main.py @@ -0,0 +1,37 @@ +import os +from conf import create_secrets +from uuid import uuid4 +from spark_on_k8s.k8s.sync_client import KubernetesClientManager +from spark_on_k8s.client import SparkOnK8S + +envs = ["KUBECONFIG_FILE_PATH", "JOB_NAME", "SPARK_IMAGE"] + +for var in envs: + if var not in os.environ: + raise EnvironmentError("Failed because {} is not set.".format(var)) + +kube_conf = os.environ["KUBECONFIG_FILE_PATH"] +job_name = os.environ["JOB_NAME"] +spark_image = os.environ["SPARK_IMAGE"] + +k8s_client_manager = KubernetesClientManager(kube_conf) +spark_k8s_client = SparkOnK8S(k8s_client_manager=k8s_client_manager) + +path = "s3a://test-bucket/metrics_one.json" + +spark_k8s_client.submit_app( + image=spark_image, + app_path=f"local:///opt/spark/custom_jobs/{job_name}_job.py", + app_arguments=[ + path, + str(uuid4()), + "completion_dataset_metrics", + "completion_dataset" + ], + app_name=f"{spark_image}-completion-job", + namespace="spark", + service_account="spark", + app_waiter="no_wait", + image_pull_policy="IfNotPresent", + secret_values=create_secrets(), +) diff --git a/spark-test/requirements.txt b/spark-test/requirements.txt new file mode 100644 index 00000000..24216a5a --- /dev/null +++ b/spark-test/requirements.txt @@ -0,0 +1 @@ +spark-on-k8s==0.10.1 \ No newline at end of file