-
Notifications
You must be signed in to change notification settings - Fork 3
165 lines (164 loc) · 7.64 KB
/
tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# This workflow is used for testing whether the DEG list produced by the new code is identical to the expected result based on example data.
name: Tests
on:
push:
branches:
- '*'
schedule:
- cron: '0 0 1 * *'
env:
CACHE_NUMBER: 0
permissions:
contents: read
jobs:
tests:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
include:
- os: 'ubuntu-20.04' ## self-hosted
label: linux-64
prefix: /usr/share/miniconda3/envs/base
r-version: ['4.3.1']
python-version: ['3.11']
seq_type: ['temposeq'] # add later... , 'rnaseq']
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v3
- name: Install linux dependencies
run: |
sudo apt-get update
sudo apt-get -y install libcairo2-dev libxt-dev
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Setup Miniforge
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Miniforge3
miniforge-version: 23.3.1-0
environment-file: workflow/envs/base.yml
activate-environment: rodaf_base
use-mamba: true
python-version: ${{ matrix.python-version }}
mamba-version: "*"
auto-activate-base: true
auto-update-conda: true
use-only-tar-bz2: true # This needs to be set for caching to work properly, according to others
# - name: Set cache date
# run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
# - uses: actions/cache@v3
# with:
# path: ${{ matrix.prefix }}
# key: ${{ matrix.label }}-conda-${{ hashFiles('workflow/envs/base.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
# id: cache
- name: List available channels and packages
run: |
conda config --show channels
conda search r-matrixstats
- name: Update environment
run: mamba env update -n rodaf_base -f workflow/envs/base.yml
if: steps.cache.outputs.cache-hit != 'true'
- name: Check conda install
run: |
conda info
conda list
shell: bash -l {0}
- name: Import data
run: |
ls -alht # What is in the "working directory"?
git clone https://github.com/EHSRB-BSRSE-Bioinformatics/test-data
rm -r inputs # Remove existing directory before replacing w/ test data
mv test-data/dev/${{ matrix.seq_type}}/* ./
wget https://github.com/EHSRB-BSRSE-Bioinformatics/unify_temposeq_manifests/raw/main/output_manifests/Human_S1500_1.2_standardized.csv
- name: Build snakemake environment
run: |
snakemake --cores 8 --use-conda --conda-create-envs-only
- name: Install extra dependencies
run: |
conda run -p $(grep -rl "R-ODAF_reports" .snakemake/conda/*.yaml | \
sed s/\.yaml//) Rscript install.R
- name: Print env names
run: |
for yaml in .snakemake/conda/*.yaml; do
echo "$yaml:"
head -n 1 "$yaml"
done
- name: Print contents of Conda environments
run: |
for env in .snakemake/conda/*; do
if [ -d "$env" ]; then
echo "Contents of $env:"
conda activate $env
conda list
conda deactivate
fi
done
shell: bash -l {0}
- name: Run workflow
run: |
snakemake --cores 8 --use-conda
- name: Generate md5sums
run: |
md5sum output/processed/count_table.tsv > checksum.count_table
md5sum output/QC/metadata.QC_applied.txt > checksum.metadata.QC_applied
md5sum output/QC/MultiQC_Report_data.zip > checksum.multiQC
md5sum output/QC/details/QC_per_sample.txt > checksum.QC_per_sample
md5sum output/analysis/analysis_default_????????-????/DEG_lists/BaP/*.txt > checksums.BaP_DEGs #wildcards for timestamped folder
md5sum output/analysis/analysis_default_????????-????/DEG_lists/CISP/*.txt > checksums.CISP_DEGs #wildcards for timestamped folder
# md5sum output/analysis/analysis_default_????????-????/pathway_analysis/BaP/*WikiPathways* > checksums.BaP_pathways
# md5sum output/analysis/analysis_default_????????-????/pathway_analysis/CISP/* > checksums.CISP_pathways
md5sum output/analysis/analysis_default_????????-????/BMD_and_biomarker_files/*.txt > checksums.BMD_files
md5sum output/analysis/analysis_default_????????-????/BMD_and_biomarker_files/*/*.txt >> checksums.BMD_files
- name: Compare pre-processing and QC files to truth set
run: |
cmp truth_checksums/checksum.count_table checksum.count_table
cmp truth_checksums/checksum.metadata.QC_applied checksum.metadata.QC_applied
#cmp truth_checksums/checksum.multiQC checksum.multiQC
cmp truth_checksums/checksum.QC_per_sample checksum.QC_per_sample
- name: Print out checksums for DEG lists
run: |
echo "BaP DEG checksums from this run:"
cat checksums.BaP_DEGs # Can be informative if there are errors
echo "BaP DEG checksums from 'truth' set:"
cat truth_checksums/checksums.BaP_DEGs # Can be informative if there are errors
# There are rounding differences between different environments, e.g.:
# cat analysis/DEG_lists/BaP/*_significant.txt
echo "CISP DEG checksums from this run:"
cat checksums.CISP_DEGs
echo "CISP DEG checksums from 'truth' set:"
cat truth_checksums/checksums.CISP_DEGs # Can be informative if there are errors
# echo "BaP pathway checksums from this run:"
# cat checksums.BaP_pathways
# echo "BaP pathway checksums from 'truth' set:"
# cat truth_checksums/checksums.BaP_pathways
- name: Compare DEG lists to truth set
run: |
# Only check first column, because of timestamps in filenames
# cmp <(cat truth_checksums/checksums.BaP_DEGs | awk {'print $1'} | sort) \
# <(cat checksums.BaP_DEGs | awk {'print $1'} | sort)
# cmp <(cat truth_checksums/checksums.CISP_DEGs | awk {'print $1'} | sort) \
# <(cat checksums.CISP_DEGs | awk {'print $1'} | sort)
# Instead of cmp, this code will check if ALL the checksums from the test have a corresponding match in the larger set of possible "truth" checksums.
# This is a hack, but it should work to deal with rounding issues.
diff -q <(sort -u checksums.BaP_DEGs | awk {'print $1'}) \
<(grep -Fxf \
<(cat checksums.BaP_DEGs | awk {'print $1'}) \
<(cat truth_checksums/checksums.BaP_DEGs | awk {'print $1'} | sort -u))
diff -q <(sort -u checksums.CISP_DEGs | awk {'print $1'}) \
<(grep -Fxf \
<(cat checksums.CISP_DEGs | awk {'print $1'}) \
<(cat truth_checksums/checksums.CISP_DEGs | awk {'print $1'} | sort -u))
# - name: Compare pathway analysis to truth set
# run: |
# cmp truth_checksums/checksums.BaP_pathways checksums.BaP_pathways
- name: Compare BMD input files to truth set
run: |
cat checksums.BMD_files # Can be informative if there are errors
cat truth_checksums/checksums.BMD_files # Can be informative if there are errors
cmp <(sort -u checksums.BMD_files | awk {'print $1'}) <(sort -u truth_checksums/checksums.BMD_files | awk {'print $1'} )