Skip to content

Commit

Permalink
feat: add spark-test service
Browse files Browse the repository at this point in the history
  • Loading branch information
carlopignatiello committed Dec 12, 2024
1 parent 088291b commit d56270f
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,16 @@ services:
start_period: 5s
retries: 2

spark-test:
profiles: ["spark-test"]
build: ./spark-test
environment:
JOB_NAME: "completion"
KUBECONFIG_FILE_PATH: "/opt/kubeconfig/kubeconfig.yaml"
SPARK_IMAGE: "radicalbit-spark-py:develop"
volumes:
- ./docker/k3s_data/kubeconfig/kubeconfig.yaml:/opt/kubeconfig/kubeconfig.yaml

dind:
image: docker:dind
privileged: true
Expand Down
11 changes: 11 additions & 0 deletions spark-test/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.11.8-slim

WORKDIR /spark-test

COPY requirements.txt requirements.txt

RUN pip install --no-cache-dir -r requirements.txt

COPY . .

CMD ["python3", "main.py"]
14 changes: 14 additions & 0 deletions spark-test/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
def create_secrets():
return {
"AWS_ACCESS_KEY_ID": "minio",
"AWS_SECRET_ACCESS_KEY": "minio123",
"AWS_REGION": "us-east-1",
"S3_ENDPOINT_URL": "http://minio:9000",
"POSTGRES_URL": "jdbc:postgresql://postgres:5432/radicalbit",
"POSTGRES_DB": "radicalbit",
"POSTGRES_HOST": "postgres",
"POSTGRES_PORT": "5432",
"POSTGRES_USER": "postgres",
"POSTGRES_PASSWORD": "postgres",
"POSTGRES_SCHEMA": "public",
}
37 changes: 37 additions & 0 deletions spark-test/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
from conf import create_secrets
from uuid import uuid4
from spark_on_k8s.k8s.sync_client import KubernetesClientManager
from spark_on_k8s.client import SparkOnK8S

envs = ["KUBECONFIG_FILE_PATH", "JOB_NAME", "SPARK_IMAGE"]

for var in envs:
if var not in os.environ:
raise EnvironmentError("Failed because {} is not set.".format(var))

kube_conf = os.environ["KUBECONFIG_FILE_PATH"]
job_name = os.environ["JOB_NAME"]
spark_image = os.environ["SPARK_IMAGE"]

k8s_client_manager = KubernetesClientManager(kube_conf)
spark_k8s_client = SparkOnK8S(k8s_client_manager=k8s_client_manager)

path = "s3a://test-bucket/metrics_one.json"

spark_k8s_client.submit_app(
image=spark_image,
app_path=f"local:///opt/spark/custom_jobs/{job_name}_job.py",
app_arguments=[
path,
str(uuid4()),
"completion_dataset_metrics",
"completion_dataset"
],
app_name=f"{spark_image}-completion-job",
namespace="spark",
service_account="spark",
app_waiter="no_wait",
image_pull_policy="IfNotPresent",
secret_values=create_secrets(),
)
1 change: 1 addition & 0 deletions spark-test/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
spark-on-k8s==0.10.1

0 comments on commit d56270f

Please sign in to comment.