Skip to content

Commit

Permalink
fix(spark): completion job entry point
Browse files Browse the repository at this point in the history
  • Loading branch information
carlopignatiello committed Dec 13, 2024
1 parent d56270f commit dcbef81
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions spark/jobs/completion_job.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import sys
import os
import uuid

import orjson
from pyspark.sql.types import StructField, StructType, StringType

from metrics.completion_metrics import LLMMetrics
from utils.models import JobStatus
from utils.db import update_job_status, write_to_db

Expand All @@ -13,7 +15,11 @@

def compute_metrics(df: DataFrame) -> dict:
complete_record = {}
# TODO: compute model quality metrics
completion_service = LLMMetrics()
model_quality = completion_service.extract_metrics(df)
complete_record["MODEL_QUALITY"] = orjson.dumps(model_quality.model_dump(serialize_as_any=True)).decode(
"utf-8"
)
return complete_record


Expand Down Expand Up @@ -43,6 +49,7 @@ def main(
spark_context._jsc.hadoopConfiguration().set(
"fs.s3a.connection.ssl.enabled", "false"
)
print(completion_dataset_path)
df = spark_session.read.option("multiline", "true").json(completion_dataset_path)
complete_record = compute_metrics(df)

Expand Down

0 comments on commit dcbef81

Please sign in to comment.