Skip to content

test workflow per dataset #21

test workflow per dataset

test workflow per dataset #21

name: Test Pie Dataset
on:
push:
branches: [main]
paths:
- "dataset_builders/pie/**"
- "data/datasets/**"
- "tests/dataset_builders/pie/**"
- "tests/fixtures/dataset_builders/pie/**"
- ".github/workflows/test_pie_datasets.yaml"
pull_request:
branches: [main, "release/*"]
paths:
- "dataset_builders/pie/**"
- "data/datasets/**"
- "tests/dataset_builders/pie/**"
- "tests/fixtures/dataset_builders/pie/**"
- ".github/workflows/test_pie_datasets.yaml"
jobs:
collect_datasets: # Job that list datasets
runs-on: ubuntu-latest
outputs:
datasets: ${{ steps.set-datasets.outputs.datasets }} # generate output name dir by using inner step output
steps:
- uses: actions/checkout@v4
- name: Get changed dataset files
id: changed-files
uses: tj-actions/changed-files@v44
with:
files_yaml: |
datasets:
- 'dataset_builders/pie/**'
- 'data/datasets/**'
- 'tests/dataset_builders/pie/**'
- 'tests/fixtures/dataset_builders/pie/**'
- name: Set datasets
id: set-datasets # Give it an id to handle to get step outputs in the outputs key above
# NOTE: Ensure all outputs are prefixed by the same key used above e.g. `dataset_(...)`
# when trying to access the `any_changed` output.
if: steps.changed-files.outputs.datasets_any_changed == 'true'
env:
DATASETS_ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.datasets_all_changed_files }}
# Define step output named dir base on ls command transformed to JSON thanks to jq
run: |
for file in ${DATASETS_ALL_CHANGED_FILES}; do
echo "$file was changed"
done
DATASETS=$(ls dataset_builders/pie)
for dataset in ${DATASETS}; do
echo "$dataset was found"
done
echo "datasets=$(echo "$DATASETS" | jq -R -s -c 'split("\n")[:-1]')" >> "$GITHUB_OUTPUT"
- if: steps.changed-files.outputs.datasets_any_changed != 'true'
run: echo "datasets=[]" >> "$GITHUB_OUTPUT"
dataset_builder:
runs-on: ubuntu-latest
needs: [ collect_datasets ] # Depends on previous job
strategy:
matrix:
dataset: ${{fromJson(needs.collect_datasets.outputs.datasets)}} # List matrix strategy from directories dynamically
steps:
- run: echo ${{matrix.dataset}}