From f6002f9f0b190903518252807985f3604cd59dac Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Thu, 19 Dec 2024 14:22:07 -0800 Subject: [PATCH] ci: Output results in a CSV format (#3625) # Overview This PR makes the visualizations of the `run-cluster` more user-friendly. ## Usage If you're ever running a script (on the GHA runner node, not on ray), you can output whatever files you want to the `$GHA_OUTPUT_DIR` env-variable, and that file or directory will be uploaded to the GitHub Actions Summary Page once the job fully completes. This is what the `.github/ci-scripts/job_runner.py` file does. If you ever create a new GHA runner script, then outputting any files to the `$GHA_OUTPUT_DIR` will work. --- .github/ci-scripts/job_runner.py | 18 +++++++++++++++--- .github/workflows/run-cluster.yaml | 11 +++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.github/ci-scripts/job_runner.py b/.github/ci-scripts/job_runner.py index 12c949136f..c36226c1ab 100644 --- a/.github/ci-scripts/job_runner.py +++ b/.github/ci-scripts/job_runner.py @@ -1,12 +1,14 @@ # /// script # requires-python = ">=3.12" -# dependencies = [] +# dependencies = ["ray[default]"] # /// import argparse import asyncio +import csv import json -from dataclasses import dataclass +import os +from dataclasses import asdict, dataclass from datetime import datetime, timedelta from pathlib import Path from typing import Optional @@ -45,6 +47,11 @@ def submit_job( env_vars: str, enable_ray_tracing: bool, ): + if "GHA_OUTPUT_DIR" not in os.environ: + raise RuntimeError("Output directory environment variable not found; don't know where to store outputs") + output_dir = Path(os.environ["GHA_OUTPUT_DIR"]) + output_dir.mkdir(exist_ok=True, parents=True) + env_vars_dict = parse_env_var_str(env_vars) if enable_ray_tracing: env_vars_dict["DAFT_ENABLE_RAY_TRACING"] = "1" @@ -85,7 +92,12 @@ def submit_job( result = Result(query=index, duration=duration, error_msg=error_msg) results.append(result) - print(f"{results=}") + output_file = output_dir / "out.csv" + with open(output_file, mode="w", newline="") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=results[0].__dataclass_fields__.keys()) + writer.writeheader() + for result in results: + writer.writerow(asdict(result)) if __name__ == "__main__": diff --git a/.github/workflows/run-cluster.yaml b/.github/workflows/run-cluster.yaml index f5c41aaf75..7bb35ac765 100644 --- a/.github/workflows/run-cluster.yaml +++ b/.github/workflows/run-cluster.yaml @@ -85,6 +85,10 @@ jobs: uv v source .venv/bin/activate uv pip install ray[default] boto3 + GHA_OUTPUT_DIR=/tmp/outputs + mkdir -p $GHA_OUTPUT_DIR + echo "Output dir is set to $GHA_OUTPUT_DIR" + echo "GHA_OUTPUT_DIR=$GHA_OUTPUT_DIR" >> $GITHUB_ENV - name: Dynamically update ray config file run: | source .venv/bin/activate @@ -121,6 +125,7 @@ jobs: echo 'Invalid command submitted; command cannot be empty' exit 1 fi + echo "Output dir: $GHA_OUTPUT_DIR" python .github/ci-scripts/job_runner.py \ --working-dir='${{ inputs.working_dir }}' \ --entrypoint-script='${{ inputs.entrypoint_script }}' \ @@ -157,6 +162,12 @@ jobs: run: | source .venv/bin/activate ray down .github/assets/ray.yaml -y + - name: Upload output dir + if: always() + uses: actions/upload-artifact@v4 + with: + name: outputs + path: ${{ env.GHA_OUTPUT_DIR }} - name: Upload log files if: always() uses: actions/upload-artifact@v4