refactor: include advanced execution #2826

Workflow file for this run

	name: Tests

	on:
	push:
	branches:
	- master
	pull_request:
	branches:
	- "*"

	jobs:
	Cancel-previous-jobs:
	runs-on: ubuntu-latest
	if: github.ref != 'refs/heads/master'
	steps:
	- uses: khan/[email protected]
	with:
	workflows: "main.yml"
	env:
	GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

	Formatting:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0
	- name: Formatting
	uses: super-linter/[email protected]
	env:
	VALIDATE_ALL_CODEBASE: false
	DEFAULT_BRANCH: master
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	FILTER_REGEX_EXCLUDE: CHANGELOG.md
	VALIDATE_SNAKEMAKE_SNAKEFMT: true
	VALIDATE_PYTHON_BLACK: true
	VALIDATE_MARKDOWN: true

	Linting:
	runs-on: ubuntu-latest
	env:
	GISAID_API_TOKEN: ${{ secrets.GISAID_API_TOKEN }}
	steps:
	- uses: actions/checkout@v4
	- name: Lint workflow
	uses: snakemake/[email protected]
	with:
	directory: .
	snakefile: workflow/Snakefile
	stagein: mamba install -n snakemake -c conda-forge peppy
	args: "--lint"

	# pre-commit action currently fails:
	# https://github.com/IKIM-Essen/uncovar/actions/runs/4304753941/jobs/7506225198#step:4:115
	# revisit when new pre-commit release >3.0.0 is out
	# Pre-Commit:
	# runs-on: ubuntu-latest
	# if: github.ref != 'refs/heads/master'
	# steps:
	# - uses: actions/checkout@v4
	# - uses: actions/setup-python@v5
	# - uses: pre-commit/[email protected]

	Technology-Tests:
	runs-on: ubuntu-latest
	env:
	GISAID_API_TOKEN: ${{ secrets.GISAID_API_TOKEN }}
	needs:
	- Linting
	#- Pre-Commit
	strategy:
	matrix:
	rule: [all, all -np]
	# disable ont actions
	technology: [all, illumina, ont, ion]
	# technology: [all, illumina, ion]
	seq_method: [shotgun, amplicon]
	steps:
	- uses: actions/checkout@v4

	- name: Free Disk Space (Ubuntu)
	uses: jlumbroso/[email protected]
	with:
	# this might remove tools that are actually needed,
	# if set to "true" but frees about 6 GB
	tool-cache: false

	# all of these default to true, but feel free to set to
	# "false" if necessary for your workflow
	android: true
	dotnet: true
	haskell: true
	large-packages: true
	docker-images: false
	swap-storage: true

	- uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	# android - will release about 10 GB if you don't need Android
	# dotnet - will release about 20 GB if you don't need .NET
	- name: Free up some disk sapce
	run: \|
	sudo rm -rf /usr/local/lib/android
	sudo rm -rf /usr/share/dotnet
	- name: Prepare test data for all technologies
	if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'all' \|\| matrix.rule == 'compare_assemblers')
	run: \|
	if [[ "${{ matrix.seq_method }}" = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
	mkdir -p .tests/data
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz
	echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
	echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv
	echo ion-test,data/ion_reads.fastq.gz,,2022-01-01,$AMPLICON,ion >> .tests/config/pep/samples.csv
	- name: Prepare test data for Illumina
	if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'illumina' \|\| matrix.rule == 'compare_assemblers')
	run: \|
	if [[ "${{ matrix.seq_method }}" = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
	mkdir -p .tests/data
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
	echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
	echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,$AMPLICON,illumina >> .tests/config/pep/samples.csv
	- name: Prepare test data for Oxford Nanopore
	if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ont' \|\| matrix.rule == 'compare_assemblers')
	run: \|
	if [[ "${{ matrix.seq_method }}" = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
	mkdir -p .tests/data
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz
	echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
	echo ont-test,data/ont_reads.fastq.gz,2022-01-01,$AMPLICON,ont >> .tests/config/pep/samples.csv
	- name: Prepare test data for Ion Torrent
	if: steps.test-data.outputs.cache-hit != true && (startsWith(matrix.rule, 'all') && matrix.technology == 'ion' \|\| matrix.rule == 'compare_assemblers')
	run: \|
	if [[ "${{ matrix.seq_method }}" = "shotgun" ]] ; then export AMPLICON=0; else export AMPLICON=1; fi
	mkdir -p .tests/data
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ERR5745913.fastq.gz > .tests/data/ion_reads.fastq.gz
	echo sample_name,fq1,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
	echo ion-test,data/ion_reads.fastq.gz,2022-01-01,$AMPLICON,ion >> .tests/config/pep/samples.csv
	- name: Use smaller reference files for testing
	if: steps.test-resources.outputs.cache-hit != true
	run: \|
	# mkdir -p .tests/resources/minikraken-8GB
	# curl -SL https://github.com/thomasbtf/small-kraken-db/raw/master/human_k2db.tar.gz \| tar zxvf - -C .tests/resources/minikraken-8GB --strip 1
	mkdir -p .tests/resources/genomes
	curl -SL "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=BA000005.3&db=nuccore&report=fasta" \| gzip -c > .tests/resources/genomes/human-genome.fna.gz
	- name: Simulate GISAID download
	run: \|
	mkdir -p .tests/results/benchmarking/tables
	echo -e "resources/genomes/B.1.1.7.fasta\nresources/genomes/B.1.351.fasta" > .tests/results/benchmarking/tables/strain-genomes.txt
	mkdir -p .tests/resources/genomes
	curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314997.1&rettype=fasta" \| sed '$ d' > .tests/resources/genomes/B.1.1.7.fasta
	curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314998.1&rettype=fasta" \| sed '$ d' > .tests/resources/genomes/B.1.351.fasta
	- name: Test rule ${{ matrix.rule }} on ${{ matrix.technology }} ${{ matrix.seq_method }} data
	uses: snakemake/[email protected]
	with:
	directory: .tests
	snakefile: workflow/Snakefile
	args: "-p --use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba ${{ matrix.rule }}"

	- name: Test report
	uses: snakemake/[email protected]
	if: startsWith(matrix.rule, 'all -np') != true
	with:
	directory: .tests
	snakefile: workflow/Snakefile
	args: "${{ matrix.rule }} --report report.zip"

	- name: Upload report
	uses: actions/upload-artifact@v4
	if: matrix.technology == 'all' && matrix.rule != 'all -npr'
	with:
	name: report-rule-${{ matrix.rule }}-${{ matrix.technology }}-${{ matrix.seq_method }}
	path: .tests/results/patient-reports/2022-01-01.zip

	- name: Upload logs
	uses: actions/upload-artifact@v4
	if: matrix.technology == 'all' && matrix.rule != 'all -npr'
	with:
	name: log-rule-${{ matrix.rule }}-technology-${{ matrix.technology }}-${{ matrix.seq_method }}
	path: .tests/logs/

	- name: Change permissions for caching
	run: sudo chmod -R 755 .tests/.snakemake/conda

	- name: Print disk space
	run: sudo df -h

	Benchmarks-Tests:
	runs-on: ubuntu-latest
	env:
	GISAID_API_TOKEN: ${{ secrets.GISAID_API_TOKEN }}
	needs:
	- Linting
	#- Pre-Commit
	strategy:
	matrix:
	rule:
	[
	benchmark_strain_calling,
	benchmark_assembly,
	benchmark_mixtures,
	benchmark_reads,
	compare_assemblers,
	]
	# generate_test_cases,
	steps:
	- uses: actions/checkout@v4

	- name: Free Disk Space (Ubuntu)
	uses: jlumbroso/[email protected]
	with:
	# this might remove tools that are actually needed,
	# if set to "true" but frees about 6 GB
	tool-cache: false

	# all of these default to true, but feel free to set to
	# "false" if necessary for your workflow
	android: true
	dotnet: true
	haskell: true
	large-packages: true
	docker-images: false
	swap-storage: true

	# - name: Cache conda dependencies
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/.snakemake/conda
	# key: benchmarks-${{ runner.os }}-${{ matrix.rule }}-${{ matrix.technology }}-${{ matrix.seq_method }}-${{ hashFiles('.tests/.snakemake/conda/.yaml') }}

	# TODO caches are currently completely misleading, as they lead to certain files becoming present on disk which might
	# then hide failures that would otherwise be seen.

	# - name: Get date
	# id: get-date
	# run: \|
	# echo "::set-output name=date::$(/bin/date -u "+%Y%m%d")"
	# shell: bash

	# - name: Cache resources
	# id: test-resources
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/resources/minikraken-8GB
	# .tests/resources/genomes/human-genome.fna.gz
	# key: ${{ runner.os }}-test-resources-${{ steps.get-date.outputs.date }}-${{ hashFiles('.tests/resourcestaxo.k2d') }}
	# restore-keys: \|
	# ${{ runner.os }}-test-resources-${{ steps.get-date.outputs.date }}-
	# ${{ runner.os }}-test-resources-

	# - name: Cache results
	# if: startsWith(matrix.rule, 'all')
	# id: test-results
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/results
	# key: ${{ runner.os }}-results-${{ steps.get-date.outputs.date }}-${{ hashFiles('**results/2021-02-01/qc/multiqc.html') }}
	# restore-keys: \|
	# ${{ runner.os }}-results-${{ steps.get-date.outputs.date }}-
	# ${{ runner.os }}-results-

	# - name: Cache data
	# if: startsWith(matrix.rule, 'all')
	# id: test-data
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/data
	# key: ${{ runner.os }}-test-data-${{ steps.get-date.outputs.date }}-${{ hashFiles('*.tests/data/.fastq.gz') }}
	# restore-keys: \|
	# ${{ runner.os }}-test-data-${{ steps.get-date.outputs.date }}-
	# ${{ runner.os }}-test-data-

	# - name: Cache benchmark data
	# if: startsWith(matrix.rule, 'all') != true
	# id: benchmark-data
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/resources/benchmarking
	# key: ${{ runner.os }}-benchmark-data-${{ steps.get-date.outputs.date }}-${{ hashFiles('.tests/resources/benchmarking//reads.1.fastq.gz') }}
	# restore-keys: \|
	# ${{ runner.os }}-benchmark-data-${{ steps.get-date.outputs.date }}-
	# ${{ runner.os }}-benchmark-data-

	# - name: Cache test dependencies
	# if: startsWith(matrix.rule, 'all')
	# id: test-dependencies
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/.snakemake/conda
	# key: ${{ runner.os }}-sars-cov-test-dependencies-${{ steps.get-date.outputs.date }}-${{ hashFiles('.tests/.snakemake/conda/.yaml') }}
	# restore-keys: \|
	# ${{ runner.os }}-sars-cov-test-dependencies-${{ steps.get-date.outputs.date }}-
	# ${{ runner.os }}-sars-cov-test-dependencies-

	# - name: Cache benchmark dependencies
	# if: startsWith(matrix.rule, 'all') != true
	# id: benchmark-dependencies
	# uses: actions/cache@v2
	# with:
	# path: \|
	# .tests/.snakemake/conda
	# key: ${{ runner.os }}-sars-cov-benchmark-dependencies-${{ steps.get-date.outputs.date }}-${{ hashFiles('.tests/.snakemake/conda/.yaml') }}
	# restore-keys: \|
	# ${{ runner.os }}-sars-cov-benchmark-dependencies-${{ steps.get-date.outputs.date }}-
	# ${{ runner.os }}-sars-cov-benchmark-dependencies-

	- name: Prepare test data
	if: matrix.rule == 'generate_test_cases'
	run: \|
	mkdir -p .tests/data
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/ont_reads.fastq.gz > .tests/data/ont_reads.fastq.gz
	echo sample_name,fq1,fq2,date,is_amplicon_data,technology,test_case > .tests/config/pep/samples.csv
	echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,1,illumina,case >> .tests/config/pep/samples.csv
	echo ont-test,data/ont_reads.fastq.gz,,2022-01-01,1,ont,case >> .tests/config/pep/samples.csv
	- name: Prepare test data
	if: matrix.rule != 'generate_test_cases'
	run: \|
	mkdir -p .tests/data
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.1.fastq.gz
	curl -L https://github.com/thomasbtf/small-kraken-db/raw/master/B.1.1.7.reads.1.fastq.gz > .tests/data/B117.2.fastq.gz
	echo sample_name,fq1,fq2,date,is_amplicon_data,technology > .tests/config/pep/samples.csv
	echo illumina-test,data/B117.1.fastq.gz,data/B117.2.fastq.gz,2022-01-01,0,illumina >> .tests/config/pep/samples.csv
	- name: Use smaller reference files for testing
	if: steps.test-resources.outputs.cache-hit != true
	run: \|
	# mkdir -p .tests/resources/minikraken-8GB
	# curl -SL https://github.com/thomasbtf/small-kraken-db/raw/master/human_k2db.tar.gz \| tar zxvf - -C .tests/resources/minikraken-8GB --strip 1
	mkdir -p .tests/resources/genomes
	curl -SL "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?id=BA000005.3&db=nuccore&report=fasta" \| gzip -c > .tests/resources/genomes/human-genome.fna.gz
	- name: Simulate GISAID download
	run: \|
	mkdir -p .tests/results/benchmarking/tables
	echo -e "resources/genomes/B.1.1.7.fasta\nresources/genomes/B.1.351.fasta" > .tests/results/benchmarking/tables/strain-genomes.txt
	mkdir -p .tests/resources/genomes
	curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314997.1&rettype=fasta" \| sed '$ d' > .tests/resources/genomes/B.1.1.7.fasta
	curl "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=MZ314998.1&rettype=fasta" \| sed '$ d' > .tests/resources/genomes/B.1.351.fasta
	- name: Test rule ${{ matrix.rule }}
	uses: snakemake/[email protected]
	with:
	directory: .tests
	snakefile: workflow/Snakefile
	args: "-p --use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba ${{ matrix.rule }}"

	- name: Test report
	uses: snakemake/[email protected]
	if: startsWith(matrix.rule, 'all -np') != true
	with:
	directory: .tests
	snakefile: workflow/Snakefile
	args: "${{ matrix.rule }} --report report.zip"

	# - name: Upload report
	# uses: actions/upload-artifact@v4
	# with:
	# name: report-rule-${{ matrix.rule }}
	# path: .tests/results/patient-reports/2022-01-01.zip

	- name: Upload logs
	uses: actions/upload-artifact@v4
	with:
	name: log-rule-${{ matrix.rule }}
	path: .tests/logs/

	# - name: Unit test
	# args: "--generate-unit-tests"
	# - name: Test workflow (singularity)
	# args: "--use-conda --use-singularity --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba"
	# - name: Test input changes
	# args: "--use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba -R `snakemake --list-input-changes`"
	# - name: Test code changes
	# args: "--use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba -R `snakemake --list-code-changes`"
	# - name: Test params changes
	# args: "--use-conda --show-failed-logs --cores 2 --resources ncbi_api_requests=1 --conda-cleanup-pkgs cache --conda-frontend mamba -R `snakemake --list-params-changes`"

	- name: Check strain calling benchmark
	if: matrix.rule == 'benchmark_strain_calling'
	run: \|
	cat .tests/results/benchmarking/strain-calling.csv
	if (tail -n+2 .tests/results/benchmarking/strain-calling.csv \| grep mismatch > /dev/null)
	then
	echo "Strain calling failed in some cases (see above)."
	exit 1
	else
	echo "Strain calling was successful in all cases."
	fi
	- name: Check pseudoassembly benchmark
	if: matrix.rule == 'benchmark_assembly'
	run: \|
	cat .tests/results/benchmarking/assembly/pseudoassembly.csv
	if [[ $(tail -1 .tests/results/benchmarking/assembly/pseudoassembly.csv) < 0.95 ]]
	then
	echo "Pseudoassembly benchmarking failed. There is at least one assembly where the contigs do not cover 95% of the original sequence (see above)."
	exit 1
	else
	echo "Pseudoassembly was successful."
	fi
	- name: Check assembly benchmark
	if: matrix.rule == 'benchmark_assembly'
	run: \|
	cat .tests/results/benchmarking/assembly/assembly.csv
	if [[ $(tail -1 .tests/results/benchmarking/assembly/assembly.csv) < 0.8 ]]
	then
	echo "Assembly benchmarking failed. There is at least one assembly where the contigs do not cover 80% of the original sequence (see above)."
	exit 1
	else
	echo "Assembly was successful."
	fi
	- name: Print non-sars-cov-2 kallisto calls
	if: matrix.rule == 'benchmark_non_sars_cov_2'
	run: \|
	cat .tests/results/benchmarking/tables/strain-calls/non-cov2-*.strains.kallisto.tsv
	- name: Test non-sars-cov-2 coronaviruses
	if: matrix.rule == 'benchmark_non_sars_cov_2'
	run: \|
	cat .tests/results/benchmarking/non-sars-cov-2.csv
	if (cat .tests/results/benchmarking/non-sars-cov-2.csv \| grep 'is sars-cov-2' > /dev/null)
	then
	echo "Workflow failed! A non-sars-cov-2 genome was identified as sars-cov-2 (see above)."
	exit 1
	else
	echo "Workflow sucessfully identified samples as non-sars-cov-2 in all cases."
	fi
	- name: Change permissions for caching
	run: sudo chmod -R 755 .tests/.snakemake/conda

	- name: Print disk space
	run: sudo df -h

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

refactor: include advanced execution #2826

Workflow file

refactor: include advanced execution #2826

Jobs

Run details

Workflow file for this run