Simplify hallucination scorer tests to use actual HHEM model #19618
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We use self-hosted runners for performance: we can rely on docker caching | |
# on the runner machines for 0s startup times for our tests. | |
# There is an issue with self-hosted runners where if you use a mix of | |
# host and container jobs, the host jobs will fail to start | |
# (https://github.com/actions/checkout/issues/273). | |
# So we create a separate "builder" tagged runner to do the build job on | |
# the host, and then all the tests run on "runner" tagged runners. | |
name: test | |
on: | |
push: | |
jobs: | |
check-which-tests-to-run: | |
uses: ./.github/workflows/check-which-tests-to-run.yaml | |
# ==== Query Service Jobs ==== | |
build-container-query-service: | |
name: Build Legacy (Query Service) test container | |
timeout-minutes: 30 | |
runs-on: [self-hosted, builder] | |
outputs: | |
build_needed: ${{ steps.build_check.outputs.build_needed }} | |
env: | |
REGISTRY: us-east4-docker.pkg.dev/weave-support-367421/weave-images | |
needs: check-which-tests-to-run | |
# if: github.ref == 'refs/heads/master' || needs.check-which-tests-to-run.outputs.weave_query_tests | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 2 | |
- name: Check if build is needed | |
id: build_check | |
run: | | |
if [[ "${{ github.ref }}" == "refs/heads/master" || "${{ needs.check-which-tests-to-run.outputs.weave_query_tests }}" == "true" ]]; then | |
echo "Build is needed" | |
echo "build_needed=true" >> $GITHUB_OUTPUT | |
else | |
echo "Build is not needed" | |
echo "build_needed=false" >> $GITHUB_OUTPUT | |
fi | |
- name: Login to Docker Hub | |
if: steps.build_check.outputs.build_needed == 'true' | |
uses: docker/login-action@v2 | |
with: | |
registry: us-east4-docker.pkg.dev | |
username: _json_key | |
password: ${{ secrets.gcp_sa_key }} | |
- name: Prune docker cache | |
if: steps.build_check.outputs.build_needed == 'true' | |
run: docker system prune -f | |
- name: Build legacy (query service) unit test image | |
if: steps.build_check.outputs.build_needed == 'true' | |
run: python3 weave_query/docker/docker_build.py build_deps weave-test-python-query-service builder . weave_query/Dockerfile.ci.test | |
test-query-service: | |
name: Legacy (Query Service) Python unit tests | |
timeout-minutes: 15 # do not raise! running longer than this indicates an issue with the tests. fix there. | |
needs: | |
- check-which-tests-to-run | |
- build-container-query-service | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
job_num: [0, 1] | |
# runs-on: ubuntu-latest | |
container: ${{ needs.build-container-query-service.outputs.build_needed == 'true' && format('us-east4-docker.pkg.dev/weave-support-367421/weave-images/weave-test-python-query-service:{0}', github.sha) || null }} | |
services: | |
wandbservice: | |
image: us-central1-docker.pkg.dev/wandb-production/images/local-testcontainer:master | |
credentials: | |
username: _json_key | |
password: ${{ secrets.gcp_wb_sa_key }} | |
env: | |
CI: 1 | |
WANDB_ENABLE_TEST_CONTAINER: true | |
ports: | |
- '8080:8080' | |
- '8083:8083' | |
- '9015:9015' | |
options: --health-cmd "curl --fail http://localhost:8080/healthz || exit 1" --health-interval=5s --health-timeout=3s | |
outputs: | |
tests_should_run: ${{ steps.test_check.outputs.tests_should_run }} | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Check if tests should run | |
id: test_check | |
run: | | |
if [[ "${{ github.ref }}" == "refs/heads/master" || "${{ needs.check-which-tests-to-run.outputs.weave_query_tests }}" == "true" ]]; then | |
echo "Tests should run" | |
echo "tests_should_run=true" >> $GITHUB_OUTPUT | |
else | |
echo "Tests should not run" | |
echo "tests_should_run=false" >> $GITHUB_OUTPUT | |
fi | |
- name: Verify wandb server is running | |
if: steps.test_check.outputs.tests_should_run == 'true' | |
run: curl -s http://wandbservice:8080/healthz | |
- name: Run Legacy (Query Service) Python Unit Tests | |
if: steps.test_check.outputs.tests_should_run == 'true' | |
env: | |
DD_SERVICE: weave-python | |
DD_ENV: ci | |
WEAVE_SENTRY_ENV: ci | |
CI: 1 | |
WB_SERVER_HOST: http://wandbservice | |
WEAVE_SERVER_DISABLE_ECOSYSTEM: 1 | |
run: | | |
source /root/venv/bin/activate && \ | |
cd weave_query && \ | |
pytest \ | |
--job-num=${{ matrix.job_num }} \ | |
--timeout=90 \ | |
--ddtrace \ | |
--durations=5 \ | |
. | |
test-query-service-matrix-check: # This job does nothing and is only used for the branch protection | |
if: always() | |
needs: | |
- test-query-service | |
runs-on: ubuntu-latest | |
steps: | |
- name: Passes if all test-query-service jobs succeeded | |
uses: re-actors/alls-green@release/v1 | |
with: | |
jobs: ${{ toJSON(needs) }} | |
# ==== Weave UI Jobs ==== | |
weavejs-lint-compile: | |
name: WeaveJS Lint and Compile | |
runs-on: ubuntu-latest | |
needs: | |
- check-which-tests-to-run | |
steps: | |
- uses: actions/checkout@v2 | |
with: | |
fetch-depth: 0 | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Check if lint and compile should run | |
id: check_run | |
run: | | |
if [[ "${{ github.ref }}" == "refs/heads/master" || "${{ needs.check-which-tests-to-run.outputs.weave_js_tests }}" == "true" ]]; then | |
echo "Lint and compile should run" | |
echo "should_lint_and_compile=true" >> $GITHUB_OUTPUT | |
else | |
echo "Lint and compile should not run" | |
echo "should_lint_and_compile=false" >> $GITHUB_OUTPUT | |
fi | |
- uses: actions/setup-node@v1 | |
if: steps.check_run.outputs.should_lint_and_compile == 'true' | |
with: | |
node-version: '18.x' | |
- name: Run WeaveJS Lint and Compile | |
if: steps.check_run.outputs.should_lint_and_compile == 'true' | |
run: | | |
set -e | |
cd weave-js | |
yarn install --frozen-lockfile | |
yarn generate | |
yarn eslint --max-warnings=0 | |
yarn tslint | |
yarn prettier | |
yarn run tsc | |
# ==== Trace Jobs ==== | |
lint: | |
name: Python lint | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.9 | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install nox uv | |
- name: Run nox | |
run: nox -e lint | |
trace-docs: | |
name: Trace docs | |
timeout-minutes: 10 | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Setup Node.js | |
uses: actions/setup-node@v3 | |
with: | |
node-version: "20" | |
- name: Setup docs | |
run: make setup-docs-ci | |
- name: Run docs | |
run: make docs | |
trace-tests: | |
name: Trace nox tests | |
timeout-minutes: 10 | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version-major: ['3'] | |
python-version-minor: [ | |
'9', | |
'10', | |
'11', | |
'12', | |
'13', | |
# | |
] | |
nox-shard: | |
[ | |
'trace', | |
'trace_server', | |
'anthropic', | |
'cerebras', | |
'cohere', | |
'dspy', | |
'groq', | |
'google_ai_studio', | |
'instructor', | |
'langchain', | |
'litellm', | |
'llamaindex', | |
'mistral0', | |
'mistral1', | |
'notdiamond', | |
'openai', | |
'vertexai', | |
'scorers_tests', | |
'pandas-test', | |
] | |
fail-fast: false | |
services: | |
wandbservice: | |
image: us-central1-docker.pkg.dev/wandb-production/images/local-testcontainer:master | |
credentials: | |
username: _json_key | |
password: ${{ secrets.gcp_wb_sa_key }} | |
env: | |
CI: 1 | |
WANDB_ENABLE_TEST_CONTAINER: true | |
ports: | |
- '8080:8080' | |
- '8083:8083' | |
- '9015:9015' | |
options: --health-cmd "curl --fail http://localhost:8080/healthz || exit 1" --health-interval=5s --health-timeout=3s | |
weave_clickhouse: | |
image: clickhouse/clickhouse-server | |
ports: | |
- '8123:8123' | |
options: --health-cmd "wget -nv -O- 'http://localhost:8123/ping' || exit 1" --health-interval=5s --health-timeout=3s | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Set up Python ${{ matrix.python-version-major }}.${{ matrix.python-version-minor }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version-major }}.${{ matrix.python-version-minor }} | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install nox uv | |
- name: Run nox (Clickhouse Only) | |
env: | |
WEAVE_SENTRY_ENV: ci | |
CI: 1 | |
WB_SERVER_HOST: http://wandbservice | |
WF_CLICKHOUSE_HOST: localhost | |
WEAVE_SERVER_DISABLE_ECOSYSTEM: 1 | |
run: | | |
nox -e "tests-${{ matrix.python-version-major }}.${{ matrix.python-version-minor }}(shard='${{ matrix.nox-shard }}')" -- \ | |
-m "weave_client and not skip_clickhouse_client" \ | |
--weave-server=clickhouse | |
- name: Run nox | |
env: | |
WEAVE_SENTRY_ENV: ci | |
CI: 1 | |
WB_SERVER_HOST: http://wandbservice | |
WF_CLICKHOUSE_HOST: weave_clickhouse | |
WEAVE_SERVER_DISABLE_ECOSYSTEM: 1 | |
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} | |
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | |
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
HF_API_TOKEN: ${{ secrets.HF_API_TOKEN }} | |
run: | | |
nox -e "tests-${{ matrix.python-version-major }}.${{ matrix.python-version-minor }}(shard='${{ matrix.nox-shard }}')" | |
trace-tests-matrix-check: # This job does nothing and is only used for the branch protection | |
if: always() | |
needs: | |
- trace-tests | |
runs-on: ubuntu-latest | |
steps: | |
- name: Passes if all trace-tests jobs succeeded | |
uses: re-actors/alls-green@release/v1 | |
with: | |
jobs: ${{ toJSON(needs) }} |