tgx-ddi summary csv (#247) #775
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow is used for testing whether the DEG list produced by the new code is identical to the expected result based on example data. | |
name: Tests | |
on: | |
push: | |
branches: | |
- '*' | |
schedule: | |
- cron: '0 0 1 * *' | |
env: | |
CACHE_NUMBER: 0 | |
permissions: | |
contents: read | |
jobs: | |
tests: | |
env: | |
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} | |
strategy: | |
matrix: | |
include: | |
- os: 'ubuntu-20.04' ## self-hosted | |
label: linux-64 | |
prefix: /usr/share/miniconda3/envs/base | |
r-version: ['4.3.1'] | |
python-version: ['3.11'] | |
seq_type: ['temposeq'] # add later... , 'rnaseq'] | |
runs-on: ${{ matrix.os }} | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Install linux dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get -y install libcairo2-dev libxt-dev | |
- name: Set up python ${{ matrix.python-version }} | |
uses: actions/setup-python@v2 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Setup Mambaforge | |
uses: conda-incubator/setup-miniconda@v3 | |
with: | |
miniforge-variant: Mambaforge | |
miniforge-version: latest | |
environment-file: workflow/envs/base.yml | |
activate-environment: base | |
use-mamba: true | |
python-version: ${{ matrix.python-version }} | |
mamba-version: "*" | |
auto-activate-base: true | |
auto-update-conda: true | |
use-only-tar-bz2: true # This needs to be set for caching to work properly, according to others | |
# - name: Set cache date | |
# run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV | |
# - uses: actions/cache@v3 | |
# with: | |
# path: ${{ matrix.prefix }} | |
# key: ${{ matrix.label }}-conda-${{ hashFiles('workflow/envs/base.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }} | |
# id: cache | |
- name: Update environment | |
run: mamba env update -n base -f workflow/envs/base.yml | |
if: steps.cache.outputs.cache-hit != 'true' | |
- name: Check conda install | |
run: | | |
conda info | |
conda list | |
shell: bash -l {0} | |
- name: Import data | |
run: | | |
ls -alht # What is in the "working directory"? | |
git clone https://github.com/EHSRB-BSRSE-Bioinformatics/test-data | |
rm -r inputs # Remove existing directory before replacing w/ test data | |
mv test-data/dev/${{ matrix.seq_type}}/* ./ | |
wget https://github.com/EHSRB-BSRSE-Bioinformatics/unify_temposeq_manifests/raw/main/output_manifests/Human_S1500_1.2_standardized.csv | |
- name: Build snakemake environment | |
run: | | |
snakemake --cores 8 --use-conda --conda-create-envs-only | |
- name: Install extra dependencies | |
run: | | |
conda run -p $(grep -rl "R-ODAF_reports" .snakemake/conda/*.yaml | \ | |
sed s/\.yaml//) Rscript install.R | |
- name: Print env names | |
run: | | |
for yaml in .snakemake/conda/*.yaml; do | |
echo "$yaml:" | |
head -n 1 "$yaml" | |
done | |
- name: Print contents of Conda environments | |
run: | | |
for env in .snakemake/conda/*; do | |
if [ -d "$env" ]; then | |
echo "Contents of $env:" | |
conda activate $env | |
conda list | |
conda deactivate | |
fi | |
done | |
shell: bash -l {0} | |
- name: Run workflow | |
run: | | |
snakemake --cores 8 --use-conda | |
- name: Generate md5sums | |
run: | | |
md5sum output/processed/count_table.tsv > checksum.count_table | |
md5sum output/QC/metadata.QC_applied.txt > checksum.metadata.QC_applied | |
md5sum output/QC/MultiQC_Report_data.zip > checksum.multiQC | |
md5sum output/QC/details/QC_per_sample.txt > checksum.QC_per_sample | |
md5sum output/analysis/analysis_default_????????-????/DEG_lists/BaP/*.txt > checksums.BaP_DEGs #wildcards for timestamped folder | |
md5sum output/analysis/analysis_default_????????-????/DEG_lists/CISP/*.txt > checksums.CISP_DEGs #wildcards for timestamped folder | |
# md5sum output/analysis/analysis_default_????????-????/pathway_analysis/BaP/*WikiPathways* > checksums.BaP_pathways | |
# md5sum output/analysis/analysis_default_????????-????/pathway_analysis/CISP/* > checksums.CISP_pathways | |
md5sum output/analysis/analysis_default_????????-????/BMD_and_biomarker_files/*.txt > checksums.BMD_files | |
md5sum output/analysis/analysis_default_????????-????/BMD_and_biomarker_files/*/*.txt >> checksums.BMD_files | |
- name: Compare pre-processing and QC files to truth set | |
run: | | |
cmp truth_checksums/checksum.count_table checksum.count_table | |
cmp truth_checksums/checksum.metadata.QC_applied checksum.metadata.QC_applied | |
#cmp truth_checksums/checksum.multiQC checksum.multiQC | |
cmp truth_checksums/checksum.QC_per_sample checksum.QC_per_sample | |
- name: Print out checksums for DEG lists | |
run: | | |
echo "BaP DEG checksums from this run:" | |
cat checksums.BaP_DEGs # Can be informative if there are errors | |
echo "BaP DEG checksums from 'truth' set:" | |
cat truth_checksums/checksums.BaP_DEGs # Can be informative if there are errors | |
# There are rounding differences between different environments, e.g.: | |
# cat analysis/DEG_lists/BaP/*_significant.txt | |
echo "CISP DEG checksums from this run:" | |
cat checksums.CISP_DEGs | |
echo "CISP DEG checksums from 'truth' set:" | |
cat truth_checksums/checksums.CISP_DEGs # Can be informative if there are errors | |
# echo "BaP pathway checksums from this run:" | |
# cat checksums.BaP_pathways | |
# echo "BaP pathway checksums from 'truth' set:" | |
# cat truth_checksums/checksums.BaP_pathways | |
- name: Compare DEG lists to truth set | |
run: | | |
# Only check first column, because of timestamps in filenames | |
# cmp <(cat truth_checksums/checksums.BaP_DEGs | awk {'print $1'} | sort) \ | |
# <(cat checksums.BaP_DEGs | awk {'print $1'} | sort) | |
# cmp <(cat truth_checksums/checksums.CISP_DEGs | awk {'print $1'} | sort) \ | |
# <(cat checksums.CISP_DEGs | awk {'print $1'} | sort) | |
# Instead of cmp, this code will check if ALL the checksums from the test have a corresponding match in the larger set of possible "truth" checksums. | |
# This is a hack, but it should work to deal with rounding issues. | |
diff -q <(sort -u checksums.BaP_DEGs | awk {'print $1'}) \ | |
<(grep -Fxf \ | |
<(cat checksums.BaP_DEGs | awk {'print $1'}) \ | |
<(cat truth_checksums/checksums.BaP_DEGs | awk {'print $1'} | sort -u)) | |
diff -q <(sort -u checksums.CISP_DEGs | awk {'print $1'}) \ | |
<(grep -Fxf \ | |
<(cat checksums.CISP_DEGs | awk {'print $1'}) \ | |
<(cat truth_checksums/checksums.CISP_DEGs | awk {'print $1'} | sort -u)) | |
# - name: Compare pathway analysis to truth set | |
# run: | | |
# cmp truth_checksums/checksums.BaP_pathways checksums.BaP_pathways | |
- name: Compare BMD input files to truth set | |
run: | | |
cat checksums.BMD_files # Can be informative if there are errors | |
cat truth_checksums/checksums.BMD_files # Can be informative if there are errors | |
cmp <(sort -u checksums.BMD_files | awk {'print $1'}) <(sort -u truth_checksums/checksums.BMD_files | awk {'print $1'} ) |