From 294572e3faf14d56dd56264f23896abc3495345e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20K=C3=B6hnecke?= <155443293+NiklasKoehneckeAA@users.noreply.github.com> Date: Thu, 11 Apr 2024 09:13:23 +0200 Subject: [PATCH] feat: OS Support for CI (#720) * refactor: extract current github jobs for reuse * Add runner as an option to all workflows * refactor: mark tests that require docker explicitly --- .github/workflows/daily.yml | 88 +++++++++++++++++ .github/workflows/on-push.yml | 17 ++++ .../{github-actions.yml => sdk-tests.yml} | 96 +++++++++---------- pyproject.toml | 1 + .../connectors/argilla/test_argilla_client.py | 9 ++ tests/core/test_tracer.py | 2 + 6 files changed, 163 insertions(+), 50 deletions(-) create mode 100644 .github/workflows/daily.yml create mode 100644 .github/workflows/on-push.yml rename .github/workflows/{github-actions.yml => sdk-tests.yml} (90%) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml new file mode 100644 index 000000000..ce404bb05 --- /dev/null +++ b/.github/workflows/daily.yml @@ -0,0 +1,88 @@ +name: "os-support-tests" + +on: + workflow_dispatch: + # Scheduled workflows will only run on the default branch. + schedule: + - cron: '0 0 * * *' # runs once a day at midnight in the timezone of your GitHub repository + +defaults: + run: + shell: bash + +jobs: + test: + strategy: + fail-fast: false + matrix: + os: [macos-latest, windows-latest] + runs-on: ${{matrix.os}} + # difference to regular test: no docker, no venv caching + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + - name: Install dependencies + run: | + poetry config installer.max-workers 10 + poetry install --no-interaction + + - name: Run pytest + env: + AA_TOKEN: ${{ secrets.AA_TOKEN }} + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + ARGILLA_API_URL: "http://localhost:6900/" + ARGILLA_API_KEY: "argilla.apikey" + CLIENT_URL: "https://api.aleph-alpha.com" + run: | + poetry run python -c "import nltk; nltk.download('punkt')" + poetry run pytest -n 10 -m "not docker" + run-notebooks: + strategy: + fail-fast: false + matrix: + os: [macos-latest, windows-latest] + runs-on: ${{matrix.os}} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + - name: Install dependencies + run: | + poetry config installer.max-workers 10 + poetry install --no-interaction + - name: Configure Poetry for notebooks and run + env: + AA_TOKEN: ${{ secrets.AA_TOKEN }} + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + ARGILLA_API_URL: "http://localhost:6900/" + ARGILLA_API_KEY: "argilla.apikey" + CLIENT_URL: "https://api.aleph-alpha.com" + run: | + [ -f .env ] && source .env + export AA_TOKEN + # Find all .ipynb files in the directory and pass them to xargs for parallel execution + rm -rf src/examples/.ipynb_checkpoints + rm -rf src/examples/how_tos/.ipynb_checkpoints + + find src/examples -name "*.nbconvert.ipynb" -type f -delete + find src/examples -name "*.ipynb" ! -name "performance_tips.ipynb" ! -name "human_evaluation.ipynb" ! -name "how_to_human_evaluation_via_argilla.ipynb" | xargs -n 1 -P 6 poetry run jupyter nbconvert --to notebook --execute + find src/examples -name "*.nbconvert.ipynb" -type f -delete diff --git a/.github/workflows/on-push.yml b/.github/workflows/on-push.yml new file mode 100644 index 000000000..742b62f1e --- /dev/null +++ b/.github/workflows/on-push.yml @@ -0,0 +1,17 @@ +name: Intelligence Layer SDK Tests + +on: + push: + paths-ignore: + - "trace-viewer/**" + - ".github/workflows/frontend.yml" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true +jobs: + python-tests: + uses: ./.github/workflows/sdk-tests.yml + with: + runner: 'ubuntu-latest' + secrets: inherit diff --git a/.github/workflows/github-actions.yml b/.github/workflows/sdk-tests.yml similarity index 90% rename from .github/workflows/github-actions.yml rename to .github/workflows/sdk-tests.yml index 8bdddb7f2..11c3d08cf 100644 --- a/.github/workflows/github-actions.yml +++ b/.github/workflows/sdk-tests.yml @@ -1,19 +1,20 @@ -name: CI Pipeline - on: - push: - paths-ignore: - - "trace-viewer/**" - - ".github/workflows/frontend.yml" - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - + workflow_call: + inputs: + runner: + type: string + default: "ubuntu-latest" + secrets: + AA_TOKEN: + required: true + HUGGING_FACE_TOKEN: + required: true jobs: - doctest: - runs-on: ubuntu-latest - + lint: + defaults: + run: + shell: bash + runs-on: ${{inputs.runner}} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -42,23 +43,25 @@ jobs: poetry config installer.max-workers 10 poetry install --no-interaction - - name: install Sphinx - run: sudo apt-get update -y && sudo apt-get install python3-sphinx - - - name: run doctest - env: - AA_TOKEN: ${{ secrets.AA_TOKEN }} - HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} - CLIENT_URL: "https://api.aleph-alpha.com" - run: ./scripts/doctest.sh + - name: set PY for pre-commit + run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV - lint: - runs-on: ubuntu-latest + - uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} + - name: Run linters + run: | + ./scripts/lint.sh + doctest: + defaults: + run: + shell: bash + runs-on: ${{inputs.runner}} steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: actions/setup-python@v5 with: python-version: "3.10" @@ -82,22 +85,20 @@ jobs: run: | poetry config installer.max-workers 10 poetry install --no-interaction - - - name: set PY for pre-commit - run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV - - - uses: actions/cache@v4 - with: - path: ~/.cache/pre-commit - key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} - - - name: Run linters - run: | - ./scripts/lint.sh + - name: install Sphinx + run: sudo apt-get update -y && sudo apt-get install python3-sphinx + - name: run doctest + env: + AA_TOKEN: ${{ secrets.AA_TOKEN }} + HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + CLIENT_URL: "https://api.aleph-alpha.com" + run: ./scripts/doctest.sh test: - runs-on: ubuntu-latest - + defaults: + run: + shell: bash + runs-on: ${{inputs.runner}} services: argilla-elastic-search: image: docker.elastic.co/elasticsearch/elasticsearch:8.5.3 @@ -112,14 +113,13 @@ jobs: env: ARGILLA_ELASTICSEARCH: "http://argilla-elastic-search:9200" open-telemetry-trace-service: + image: jaegertracing/all-in-one:1.35 env: COLLECTOR_OTLP_ENABLED: "true" ports: - "4317:4317" - "4318:4318" - "16686:16686" - image: jaegertracing/all-in-one:1.35 - steps: - name: Checkout repository uses: actions/checkout@v4 @@ -157,10 +157,11 @@ jobs: CLIENT_URL: "https://api.aleph-alpha.com" run: | ./scripts/test.sh - run-notebooks: - runs-on: ubuntu-latest - + defaults: + run: + shell: bash + runs-on: ${{inputs.runner}} services: argilla-elastic-search: image: docker.elastic.co/elasticsearch/elasticsearch:8.5.3 @@ -174,15 +175,12 @@ jobs: - "6900:6900" env: ARGILLA_ELASTICSEARCH: "http://argilla-elastic-search:9200" - steps: - name: Checkout repository uses: actions/checkout@v4 - - uses: actions/setup-python@v5 with: python-version: "3.10" - - name: Install and configure Poetry uses: snok/install-poetry@v1 with: @@ -196,13 +194,11 @@ jobs: with: path: .venv key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} - - name: Install dependencies if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' run: | poetry config installer.max-workers 10 poetry install --no-interaction - - name: Configure Poetry for notebooks and run env: AA_TOKEN: ${{ secrets.AA_TOKEN }} diff --git a/pyproject.toml b/pyproject.toml index 2c5ea05dd..168f749c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ strict = "True" [tool.pytest.ini_options] markers = [ "internal: marks tests as internal (deselect with '-k \"not internal\"')", + "docker: the test depends on having a docker container running." ] addopts = "--capture=tee-sys" filterwarnings = [ diff --git a/tests/connectors/argilla/test_argilla_client.py b/tests/connectors/argilla/test_argilla_client.py index 3c7a1e18b..383f8ed70 100644 --- a/tests/connectors/argilla/test_argilla_client.py +++ b/tests/connectors/argilla/test_argilla_client.py @@ -112,6 +112,7 @@ def long_qa_records( return records +@pytest.mark.docker def test_error_on_non_existent_dataset( argilla_client: DefaultArgillaClient, ) -> None: @@ -119,6 +120,7 @@ def test_error_on_non_existent_dataset( list(argilla_client.records("non_existent_dataset_id")) +@pytest.mark.docker def test_records_returns_records_previously_added( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -132,6 +134,7 @@ def test_records_returns_records_previously_added( ) +@pytest.mark.docker def test_evaluations_returns_evaluation_results( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -159,6 +162,7 @@ def test_evaluations_returns_evaluation_results( ) +@pytest.mark.docker def test_split_dataset_works( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -183,6 +187,7 @@ def test_split_dataset_works( assert old_metadata == new_metadata +@pytest.mark.docker def test_split_dataset_twice_works( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -205,6 +210,7 @@ def test_split_dataset_twice_works( assert len(metadata_properties["settings"]["values"]) == 1 +@pytest.mark.docker def test_split_dataset_works_with_uneven_splits( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -222,6 +228,7 @@ def test_split_dataset_works_with_uneven_splits( assert n_records_per_split == [9, 9, 9, 9, 8, 8, 8] +@pytest.mark.docker def test_add_record_adds_multiple_records_with_same_content( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -242,6 +249,7 @@ def test_add_record_adds_multiple_records_with_same_content( assert len(list(argilla_client.records(qa_dataset_id))) == 2 +@pytest.mark.docker def test_add_record_does_not_put_example_id_into_metadata( argilla_client: DefaultArgillaClient, qa_dataset_id: str, @@ -265,6 +273,7 @@ def test_add_record_does_not_put_example_id_into_metadata( assert record.example_id == "0" +@pytest.mark.docker def test_split_dataset_can_split_long_dataset( argilla_client: DefaultArgillaClient, qa_dataset_id: str, diff --git a/tests/core/test_tracer.py b/tests/core/test_tracer.py index cfcecca1d..a4f82891c 100644 --- a/tests/core/test_tracer.py +++ b/tests/core/test_tracer.py @@ -306,6 +306,7 @@ def test_in_memory_tracer_trace_viewer_doesnt_crash_if_it_cant_reach() -> None: expected._ipython_display_() +@pytest.mark.docker def test_open_telemetry_tracer_check_consistency_in_trace_ids( open_telemetry_tracer: tuple[str, OpenTelemetryTracer], ) -> None: @@ -322,6 +323,7 @@ def test_open_telemetry_tracer_check_consistency_in_trace_ids( assert _get_trace_id_from_span(span) == expected_trace_id +@pytest.mark.docker def test_open_telemetry_tracer_loggs_input_and_output( open_telemetry_tracer: tuple[str, OpenTelemetryTracer], complete: Task[CompleteInput, CompleteOutput],