Collecting statistics from AM dataset (#100) #146

Workflow file for this run

.github/workflows/test_pie_datasets.yaml at e68f60d


	name: Test PIE Dataset

	on:
	push:
	branches: [main]
	paths:
	- "dataset_builders/pie/**"
	- "data/datasets/**"
	- "tests/dataset_builders/pie/**"
	- "tests/fixtures/dataset_builders/pie/**"
	- ".github/workflows/test_pie_datasets.yaml"
	pull_request:
	branches: [main, "release/*"]
	paths:
	- "dataset_builders/pie/**"
	- "data/datasets/**"
	- "tests/dataset_builders/pie/**"
	- "tests/fixtures/dataset_builders/pie/**"
	- ".github/workflows/test_pie_datasets.yaml"

	jobs:
	collect_modified_datasets: # Job that lists modified datasets
	runs-on: ubuntu-latest
	outputs:
	datasets: ${{ steps.set-datasets.outputs.datasets }}
	steps:
	- uses: actions/checkout@v4
	- name: Get changed dataset files
	id: changed-files
	uses: tj-actions/changed-files@v44
	with:
	files_yaml: \|
	datasets:
	- 'dataset_builders/pie/**'
	- 'data/datasets/**'
	- 'tests/dataset_builders/pie/**'
	- 'tests/fixtures/dataset_builders/pie/**'
	- name: Determine modified datasets
	id: set-datasets # Give it an id to handle step outputs in the outputs key above
	# NOTE: Ensure all outputs are prefixed by the same key used above e.g. `datasets_(...)`
	# when trying to access the `any_changed` output.
	if: steps.changed-files.outputs.datasets_any_changed == 'true'
	env:
	DATASETS_ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.datasets_all_changed_files }}
	run: \|
	echo "all changed dataset files: $DATASETS_ALL_CHANGED_FILES"
	DATASETS=$(ls dataset_builders/pie)
	echo "collected datasets: $DATASETS" \| tr '\n' ' '
	echo "\n"
	PREFIXES="dataset_builders/pie/ data/datasets/ tests/dataset_builders/pie/ tests/fixtures/dataset_builders/pie/"
	echo "file paths to check for changes: $PREFIXES"
	# filter entries in DATASETS for which any prefix+dataset is in DATASETS_ALL_CHANGED_FILES
	DATASETS_FILTERED=$(for dataset in ${DATASETS}; do
	for prefix in ${PREFIXES}; do
	if [[ $DATASETS_ALL_CHANGED_FILES == "$prefix$dataset" ]]; then
	echo $dataset
	break
	fi
	done
	done)
	echo "filtered datasets: $DATASETS_FILTERED"
	# Define step output named dataset base on ls command transformed to JSON thanks to jq
	echo "datasets=$(echo "$DATASETS_FILTERED" \| jq -R -s -c 'split("\n")[:-1]')" >> "$GITHUB_OUTPUT"

	test_dataset:
	runs-on: ubuntu-latest
	needs: [ collect_modified_datasets ] # Depends on previous job
	if: ${{ needs.collect_modified_datasets.outputs.datasets }}
	strategy:
	fail-fast: false
	matrix:
	# List matrix strategy from datasets dynamically collected in the previous job
	dataset: ${{fromJson(needs.collect_modified_datasets.outputs.datasets)}}
	timeout-minutes: 15
	steps:
	- run: echo "test dataset ${{matrix.dataset}}"
	- name: Check out repository
	uses: actions/checkout@v4
	- name: Set up python
	id: setup-python
	uses: actions/setup-python@v5
	with:
	python-version: '3.9'
	cache: 'pip' # caching pip dependencies
	- name: Install dependencies
	run: \|
	pip install --upgrade pip
	# install dataset requirements
	pip install -r dataset_builders/pie/${{matrix.dataset}}/requirements.txt
	# check if test requirements file exists and install it
	ADD_REQUIREMENTS_FILE=tests/dataset_builders/pie/${{matrix.dataset}}/additional-requirements.txt
	test -f $ADD_REQUIREMENTS_FILE && pip install -r $ADD_REQUIREMENTS_FILE
	# install pytest and pytest-cov
	pip install pytest pytest-cov
	- name: Run tests
	run: \|
	pytest \
	tests/dataset_builders/pie/${{matrix.dataset}} \
	-k "not slow" \
	--cov=dataset_builders/pie/${{matrix.dataset}} \
	--cov-report=xml:coverage/${{matrix.dataset}}.xml
	- name: Upload coverage
	uses: codecov/codecov-action@v4
	with:
	file: coverage/${{matrix.dataset}}.xml
	name: ${{matrix.dataset}}
	fail_ci_if_error: true
	env:
	CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Collecting statistics from AM dataset (#100) #146

Workflow file

Collecting statistics from AM dataset (#100) #146

Jobs

Run details

Workflow file for this run