[FEAT] Native Parquet Reader into pyarrow directly #4426
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | |
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | |
name: daft | |
on: | |
push: | |
branches: [main] | |
pull_request: | |
branches: [main] | |
env: | |
DAFT_ANALYTICS_ENABLED: '0' | |
jobs: | |
unit-tests-with-coverage: | |
runs-on: ubuntu-latest | |
timeout-minutes: 15 | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.7', '3.10'] | |
daft-runner: [py, ray] | |
new-query-planner: [1, 0] | |
pyarrow-version: [6.0.1, 12.0] | |
exclude: | |
- daft-runner: ray | |
pyarrow-version: 6.0.1 | |
- daft-runner: py | |
python-version: '3.10' | |
pyarrow-version: 6.0.1 | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: moonrepo/setup-rust@v0 | |
with: | |
cache: false | |
- name: Install cargo-llvm-cov | |
uses: taiki-e/install-action@cargo-llvm-cov | |
- uses: Swatinem/rust-cache@v2 | |
with: | |
key: ${{ runner.os }}-build | |
cache-all-crates: 'true' | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Setup Virtual Env | |
run: | | |
python -m venv venv | |
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH | |
- name: Install dependencies | |
run: | | |
pip install --upgrade pip | |
pip install -r requirements-dev.txt | |
- name: Override pyarrow | |
if: ${{ matrix.pyarrow-version }} | |
run: pip install pyarrow==${{ matrix.pyarrow-version }} | |
- name: Build library and Test with pytest | |
run: | | |
source activate | |
# source <(cargo llvm-cov show-env --export-prefix) | |
# export CARGO_TARGET_DIR=$CARGO_LLVM_COV_TARGET_DIR | |
# export CARGO_INCREMENTAL=1 | |
# cargo llvm-cov clean --workspace | |
maturin develop | |
mkdir -p report-output && pytest --cov=daft --ignore tests/integration --durations=50 | |
coverage combine -a --data-file='.coverage' || true | |
coverage xml -o ./report-output/coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}-${{ matrix.new-query-planner }}.xml | |
# cargo llvm-cov --no-run --lcov --output-path report-output/rust-coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}-${{ matrix.new-query-planner }}.lcov | |
env: | |
DAFT_RUNNER: ${{ matrix.daft-runner }} | |
DAFT_NEW_QUERY_PLANNER: ${{ matrix.new-query-planner }} | |
- name: Upload coverage report | |
uses: actions/upload-artifact@v3 | |
with: | |
name: coverage-reports | |
path: ./report-output | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] Pytest Unit Tests <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | |
integration-test-build: | |
runs-on: ubuntu-latest | |
timeout-minutes: 30 | |
env: | |
package-name: getdaft | |
strategy: | |
matrix: | |
python-version: ['3.7'] | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
architecture: x64 | |
- run: pip install -U twine toml maturin | |
- run: python tools/patch_package_version.py | |
- uses: moonrepo/setup-rust@v0 | |
with: | |
cache: false | |
- uses: Swatinem/rust-cache@v2 | |
with: | |
key: ${{ runner.os }}-integration-build | |
cache-all-crates: 'true' | |
# NOTE: we don't build with all the actual release optimizations to avoid hellish CI times | |
- name: Build wheels | |
run: maturin build --release --compatibility linux --out dist | |
- name: Upload wheels | |
uses: actions/upload-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
integration-test-tpch: | |
runs-on: ubuntu-latest | |
timeout-minutes: 30 | |
needs: | |
- integration-test-build | |
env: | |
package-name: getdaft | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.7'] | |
daft-runner: [py, ray] | |
new-query-planner: [1, 0] | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
architecture: x64 | |
- name: Download built wheels | |
uses: actions/download-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
- name: Setup Virtual Env | |
run: | | |
python -m venv venv | |
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH | |
- name: Install Daft and dev dependencies | |
run: | | |
pip install --upgrade pip | |
pip install -r requirements-dev.txt dist/${{ env.package-name }}-*x86_64*.whl --force-reinstall | |
rm -rf daft | |
- uses: actions/cache@v3 | |
env: | |
cache-name: cache-tpch-data | |
with: | |
path: data/tpch-dbgen | |
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('tests/integration/test_tpch.py', 'benchmarking/tpch/**') }} | |
- name: Run TPCH integration tests | |
run: | | |
pytest tests/integration/test_tpch.py --durations=50 | |
env: | |
DAFT_RUNNER: ${{ matrix.daft-runner }} | |
DAFT_NEW_QUERY_PLANNER: ${{ matrix.new-query-planner }} | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] TPCH Integration Tests <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | |
integration-test-io: | |
runs-on: ubuntu-latest | |
timeout-minutes: 30 | |
needs: | |
- integration-test-build | |
env: | |
package-name: getdaft | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.8'] # can't use 3.7 due to requiring anon mode for adlfs | |
daft-runner: [py, ray] | |
new-query-planner: [1, 0] | |
# These permissions are needed to interact with GitHub's OIDC Token endpoint. | |
# This is used in the step "Assume GitHub Actions AWS Credentials" | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 0 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
architecture: x64 | |
- name: Download built wheels | |
uses: actions/download-artifact@v3 | |
with: | |
name: wheels | |
path: dist | |
- name: Setup Virtual Env | |
run: | | |
python -m venv venv | |
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH | |
- name: Install Daft and dev dependencies | |
run: | | |
pip install --upgrade pip | |
pip install -r requirements-dev.txt dist/${{ env.package-name }}-*x86_64*.whl --force-reinstall | |
rm -rf daft | |
- name: Prepare tmpdirs for IO services | |
run: | | |
mkdir -p /tmp/daft-integration-testing/nginx | |
chmod +rw /tmp/daft-integration-testing/nginx | |
- name: Assume GitHub Actions AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v3 | |
with: | |
aws-region: us-west-2 | |
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} | |
role-session-name: DaftPythonPackageGitHubWorkflow | |
- name: Spin up IO services | |
uses: isbang/[email protected] | |
with: | |
compose-file: ./tests/integration/docker-compose/docker-compose.yml | |
down-flags: --volumes | |
- name: Run IO integration tests | |
run: | | |
pytest tests/integration/io -m 'integration' --durations=50 | |
env: | |
DAFT_RUNNER: ${{ matrix.daft-runner }} | |
DAFT_NEW_QUERY_PLANNER: ${{ matrix.new-query-planner }} | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] IO Integration Tests <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | |
rust-tests: | |
runs-on: ubuntu-latest | |
timeout-minutes: 15 | |
strategy: | |
fail-fast: false | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: moonrepo/setup-rust@v0 | |
with: | |
cache: false | |
- uses: Swatinem/rust-cache@v2 | |
with: | |
key: ${{ runner.os }}-rust-build | |
cache-all-crates: 'true' | |
- name: Install cargo-llvm-cov | |
uses: taiki-e/install-action@cargo-llvm-cov | |
- name: Generate code coverage | |
run: mkdir -p report-output && cargo llvm-cov --no-default-features --workspace --lcov --output-path ./report-output/lcov.info | |
- name: Upload coverage report | |
uses: actions/upload-artifact@v3 | |
with: | |
name: coverage-reports | |
path: ./report-output | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] Rust Unit Tests <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | |
publish-coverage-reports: | |
name: Publish coverage reports to CodeCov | |
runs-on: ubuntu-latest | |
needs: | |
- unit-tests-with-coverage | |
- rust-tests | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/download-artifact@v3 | |
with: | |
name: coverage-reports | |
path: ./report-output | |
- name: Upload coverage reports to Codecov with GitHub Action | |
uses: codecov/codecov-action@v3 | |
env: | |
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} | |
files: ./report-output/* | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] Codecov Uploads <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | |
test-imports: | |
runs-on: ubuntu-latest | |
timeout-minutes: 15 | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.7'] | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: moonrepo/setup-rust@v0 | |
with: | |
cache: false | |
- uses: Swatinem/rust-cache@v2 | |
with: | |
key: ${{ runner.os }}-build | |
cache-all-crates: 'true' | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Setup Virtual Env | |
run: | | |
python -m venv venv | |
source venv/bin/activate | |
pip install maturin | |
- name: Build Rust Library | |
run: | | |
venv/bin/maturin build --out dist | |
- name: Test Imports in Clean Env | |
run: | | |
rm -rf daft | |
rm -rf venv | |
python -m venv venv | |
source venv/bin/activate | |
ls -R ./dist | |
venv/bin/pip install dist/*.whl | |
venv/bin/python -c 'import daft; from daft import *' | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] Python Import Checks <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK | |
style: | |
runs-on: ubuntu-latest | |
timeout-minutes: 15 | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.8'] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v4 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install pre-commit | |
run: | | |
pip install --upgrade pip | |
pip install pre-commit | |
- uses: moonrepo/setup-rust@v0 | |
with: | |
cache: false | |
- uses: Swatinem/rust-cache@v2 | |
with: | |
key: ${{ runner.os }}-build | |
cache-all-crates: 'true' | |
- uses: actions/cache@v3 | |
id: pre-commit-cache | |
with: | |
path: ~/.cache/pre-commit/ | |
key: ${{ runner.os }}-python-${{ steps.setup-python.outputs.python-version }}-pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} | |
- name: Python And Rust Style Check | |
run: | | |
pre-commit run --all-files | |
- name: Send Slack notification on failure | |
uses: slackapi/[email protected] | |
if: ${{ failure() && (github.ref == 'refs/heads/main') }} | |
with: | |
payload: | | |
{ | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": ":rotating_light: [CI] Style Checks <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED on main* :rotating_light:" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} | |
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK |