-
Notifications
You must be signed in to change notification settings - Fork 3
150 lines (144 loc) · 7.02 KB
/
tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# This workflow is used for testing whether the DEG list produced by the new code is identical to the expected result based on example data.
name: Tests
on:
push:
branches:
- '*'
schedule:
- cron: '0 0 1 * *'
env:
CACHE_NUMBER: 0
permissions:
contents: read
jobs:
build:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
strategy:
matrix:
include:
- os: ubuntu-20.04
label: linux-64
prefix: /usr/share/miniconda3/envs/base
r-version: ['4.3.1']
python-version: ['3.7']
seq_type: ['temposeq'] # add later... , 'rnaseq']
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v3
- name: Install linux dependencies
run: |
sudo apt-get update
sudo apt-get install libcairo2-dev
sudo apt-get install libxt-dev
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Setup Mambaforge
uses: conda-incubator/setup-miniconda@v2
with:
miniforge-variant: Mambaforge
miniforge-version: latest
environment-file: workflow/envs/base.yml
activate-environment: base
use-mamba: true
python-version: ${{ matrix.python-version }}
mamba-version: "*"
auto-activate-base: true
auto-update-conda: true
use-only-tar-bz2: true # This needs to be set for caching to work properly, according to others
# - name: Set cache date
# run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
# - uses: actions/cache@v3
# with:
# path: ${{ matrix.prefix }}
# key: ${{ matrix.label }}-conda-${{ hashFiles('workflow/envs/base.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
# id: cache
- name: Update environment
run: mamba env update -n base -f workflow/envs/base.yml
if: steps.cache.outputs.cache-hit != 'true'
- name: Check conda install
run: |
conda info
conda list
shell: bash -l {0}
- name: Import data
run: |
ls -alht # What is in the "working directory"?
git clone https://github.com/EHSRB-BSRSE-Bioinformatics/test-data
rm -r inputs # Remove existing directory before replacing w/ test data
mv test-data/${{ matrix.seq_type}}/* ./
wget https://github.com/EHSRB-BSRSE-Bioinformatics/unify_temposeq_manifests/raw/main/output_manifests/Human_S1500_1.2_standardized.csv
- name: Build snakemake environment
run: |
snakemake --cores 8 --use-conda --conda-create-envs-only
- name: Install extra dependencies
run: |
conda run -p $(grep -rl "R-ODAF_reports" .snakemake/conda/*.yaml | \
sed s/\.yaml//) Rscript install.R
- name: Run workflow
run: |
snakemake --cores 8 --use-conda
- name: Generate md5sums
run: |
md5sum output/processed/count_table.tsv > checksum.count_table
md5sum inputs/metadata/metadata.QC_applied.txt > checksum.metadata.QC_applied
md5sum output/QC/MultiQC_Report_data.zip > checksum.multiQC
md5sum output/QC/details/QC_per_sample.txt > checksum.QC_per_sample
md5sum output/analysis/analysis_default_????????-????/DEG_lists/BaP/*.txt > checksums.BaP_DEGs #wildcards for timestamped folder
md5sum output/analysis/analysis_default_????????-????/DEG_lists/CISP/*.txt > checksums.CISP_DEGs #wildcards for timestamped folder
# md5sum output/analysis/analysis_default_????????-????/pathway_analysis/BaP/*WikiPathways* > checksums.BaP_pathways
# md5sum output/analysis/analysis_default_????????-????/pathway_analysis/CISP/* > checksums.CISP_pathways
md5sum output/analysis/analysis_default_????????-????/BMD_and_biomarker_files/*.txt > checksums.BMD_files
md5sum output/analysis/analysis_default_????????-????/BMD_and_biomarker_files/*/*.txt >> checksums.BMD_files
- name: Compare pre-processing and QC files to truth set
run: |
cmp truth_checksums/checksum.count_table checksum.count_table
cmp truth_checksums/checksum.metadata.QC_applied checksum.metadata.QC_applied
#cmp truth_checksums/checksum.multiQC checksum.multiQC
cmp truth_checksums/checksum.QC_per_sample checksum.QC_per_sample
- name: Print out checksums for DEG lists
run: |
echo "BaP DEG checksums from this run:"
cat checksums.BaP_DEGs # Can be informative if there are errors
echo "BaP DEG checksums from 'truth' set:"
cat truth_checksums/checksums.BaP_DEGs # Can be informative if there are errors
# There are rounding differences between different environments, e.g.:
# cat analysis/DEG_lists/BaP/*_significant.txt
echo "CISP DEG checksums from this run:"
cat checksums.CISP_DEGs
echo "CISP DEG checksums from 'truth' set:"
cat truth_checksums/checksums.CISP_DEGs # Can be informative if there are errors
# echo "BaP pathway checksums from this run:"
# cat checksums.BaP_pathways
# echo "BaP pathway checksums from 'truth' set:"
# cat truth_checksums/checksums.BaP_pathways
- name: Compare DEG lists to truth set
run: |
# Only check first column, because of timestamps in filenames
# cmp <(cat truth_checksums/checksums.BaP_DEGs | awk {'print $1'} | sort) \
# <(cat checksums.BaP_DEGs | awk {'print $1'} | sort)
# cmp <(cat truth_checksums/checksums.CISP_DEGs | awk {'print $1'} | sort) \
# <(cat checksums.CISP_DEGs | awk {'print $1'} | sort)
# Instead of cmp, this code will check if ALL the checksums from the test have a corresponding match in the larger set of possible "truth" checksums.
# This is a hack, but it should work to deal with rounding issues.
diff -q <(sort -u checksums.BaP_DEGs | awk {'print $1'}) \
<(grep -Fxf \
<(cat checksums.BaP_DEGs | awk {'print $1'}) \
<(cat truth_checksums/checksums.BaP_DEGs | awk {'print $1'} | sort -u))
diff -q <(sort -u checksums.CISP_DEGs | awk {'print $1'}) \
<(grep -Fxf \
<(cat checksums.CISP_DEGs | awk {'print $1'}) \
<(cat truth_checksums/checksums.CISP_DEGs | awk {'print $1'} | sort -u))
# - name: Compare pathway analysis to truth set
# run: |
# cmp truth_checksums/checksums.BaP_pathways checksums.BaP_pathways
- name: Compare BMD input files to truth set
run: |
cat checksums.BMD_files # Can be informative if there are errors
cat truth_checksums/checksums.BMD_files # Can be informative if there are errors
cmp <(sort -u checksums.BMD_files | awk {'print $1'}) <(sort -u truth_checksums/checksums.BMD_files | awk {'print $1'} )