Tests

Update tests.yml #501

Workflow file for this run

	# This workflow is used for testing whether the DEG list produced by the new code is identical to the expected result based on example data.

	name: Tests

	on:
	push:
	branches:
	- '*'

	schedule:
	- cron: '0 0 1 * *'
	env:
	CACHE_NUMBER: 0

	permissions:
	contents: read

	jobs:
	build:
	env:
	GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
	strategy:
	matrix:
	include:
	- os: ubuntu-20.04
	label: linux-64
	prefix: /usr/share/miniconda3/envs/base
	r-version: ['4.3.1']
	python-version: ['3.7']
	seq_type: ['temposeq'] # add later... , 'rnaseq']
	runs-on: ${{ matrix.os }}
	defaults:
	run:
	shell: bash -l {0}

	steps:
	- uses: actions/checkout@v3
	- name: Install linux dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install libcairo2-dev
	sudo apt-get install libxt-dev
	- name: Set up python ${{ matrix.python-version }}
	uses: actions/setup-python@v2
	with:
	python-version: ${{ matrix.python-version }}
	- name: Setup Mambaforge
	uses: conda-incubator/setup-miniconda@v2
	with:
	miniforge-variant: Mambaforge
	miniforge-version: latest
	environment-file: workflow/envs/base.yml
	activate-environment: base
	use-mamba: true
	python-version: ${{ matrix.python-version }}
	mamba-version: "*"
	auto-activate-base: true
	auto-update-conda: true
	use-only-tar-bz2: true # This needs to be set for caching to work properly, according to others
	# - name: Set cache date
	# run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
	# - uses: actions/cache@v3
	# with:
	# path: ${{ matrix.prefix }}
	# key: ${{ matrix.label }}-conda-${{ hashFiles('workflow/envs/base.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
	# id: cache
	- name: Update environment
	run: mamba env update -n base -f workflow/envs/base.yml
	if: steps.cache.outputs.cache-hit != 'true'
	- name: Check conda install
	run: \|
	conda info
	conda list
	shell: bash -l {0}
	- name: Import data
	run: \|
	ls -alht # What is in the "working directory"?
	git clone https://github.com/EHSRB-BSRSE-Bioinformatics/test-data
	rm -r data # Remove existing directory before replacing w/ test data
	rm -r config # Remove existing directory before replacing w/ test data
	mv test-data/${{ matrix.seq_type}}/* ./
	wget https://github.com/EHSRB-BSRSE-Bioinformatics/unify_temposeq_manifests/raw/main/output_manifests/Human_S1500_1.2_standardized.csv
	- name: Build snakemake environment
	run: \|
	snakemake --cores 8 --use-conda --conda-create-envs-only
	- name: Install extra dependencies
	run: \|
	conda run -p $(grep -rl "R-ODAF_reports" .snakemake/conda/*.yaml \| \
	sed s/\.yaml//) Rscript install.R
	- name: Run workflow
	run: \|
	snakemake --cores 8 --use-conda
	- name: Generate md5sums
	run: \|
	md5sum data/processed/count_table.tsv > checksum.count_table
	md5sum data/metadata/metadata.QC_applied.txt > checksum.metadata.QC_applied
	md5sum analysis/QC/MultiQC_Report_data.zip > checksum.multiQC
	md5sum analysis/QC/details/QC_per_sample.txt > checksum.QC_per_sample
	md5sum analysis/DEG_lists/BaP/*.txt > checksums.BaP_DEGs
	md5sum analysis/DEG_lists/CISP/*.txt > checksums.CISP_DEGs
	# md5sum analysis/pathway_analysis/BaP/WikiPathways > checksums.BaP_pathways
	# md5sum analysis/pathway_analysis/CISP/* > checksums.CISP_pathways
	md5sum analysis/BMD_and_biomarker_files/*.txt > checksums.BMD_files
	md5sum analysis/BMD_and_biomarker_files//.txt >> checksums.BMD_files
	- name: Compare pre-processing and QC files to truth set
	run: \|
	cmp truth_checksums/checksum.count_table checksum.count_table
	cmp truth_checksums/checksum.metadata.QC_applied checksum.metadata.QC_applied
	#cmp truth_checksums/checksum.multiQC checksum.multiQC
	cmp truth_checksums/checksum.QC_per_sample checksum.QC_per_sample
	- name: Print out checksums for DEG lists
	run: \|
	echo "BaP DEG checksums from this run:"
	cat checksums.BaP_DEGs # Can be informative if there are errors
	echo "BaP DEG checksums from 'truth' set:"
	cat truth_checksums/checksums.BaP_DEGs # Can be informative if there are errors
	# There are rounding differences between different environments, e.g.:
	# cat analysis/DEG_lists/BaP/*_significant.txt
	echo "CISP DEG checksums from this run:"
	cat checksums.CISP_DEGs
	echo "CISP DEG checksums from 'truth' set:"
	cat truth_checksums/checksums.CISP_DEGs # Can be informative if there are errors
	# echo "BaP pathway checksums from this run:"
	# cat checksums.BaP_pathways
	# echo "BaP pathway checksums from 'truth' set:"
	# cat truth_checksums/checksums.BaP_pathways
	- name: Compare DEG lists to truth set
	run: \|
	# Only check first column, because of timestamps in filenames
	# cmp <(cat truth_checksums/checksums.BaP_DEGs \| awk {'print $1'} \| sort) \
	# <(cat checksums.BaP_DEGs \| awk {'print $1'} \| sort)
	# cmp <(cat truth_checksums/checksums.CISP_DEGs \| awk {'print $1'} \| sort) \
	# <(cat checksums.CISP_DEGs \| awk {'print $1'} \| sort)
	# Instead of cmp, this code will check if ALL the checksums from the test have a corresponding match in the larger set of possible "truth" checksums.
	# This is a hack, but it should work to deal with rounding issues.
	diff -q <(sort -u checksums.BaP_DEGs \| awk {'print $1'}) \
	<(grep -Fxf \
	<(cat checksums.BaP_DEGs \| awk {'print $1'}) \
	<(cat truth_checksums/checksums.BaP_DEGs \| awk {'print $1'} \| sort -u))
	diff -q <(sort -u checksums.CISP_DEGs \| awk {'print $1'}) \
	<(grep -Fxf \
	<(cat checksums.CISP_DEGs \| awk {'print $1'}) \
	<(cat truth_checksums/checksums.CISP_DEGs \| awk {'print $1'} \| sort -u))
	# - name: Compare pathway analysis to truth set
	# run: \|
	# cmp truth_checksums/checksums.BaP_pathways checksums.BaP_pathways
	- name: Compare BMD input files to truth set
	run: \|
	cat checksums.BMD_files # Can be informative if there are errors
	cat truth_checksums/checksums.BMD_files # Can be informative if there are errors
	cmp truth_checksums/checksums.BMD_files checksums.BMD_files

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Update tests.yml #501

Workflow file

Update tests.yml #501

Jobs

Run details

Workflow file for this run