-
Notifications
You must be signed in to change notification settings - Fork 0
/
03-prs.sh
executable file
·107 lines (88 loc) · 3.71 KB
/
03-prs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/bin/bash
# strict stop if there are any errors
set -e
# get environmental variables
source config.env
# create results directory
mkdir -p ${results_dir}/03
mkdir -p ${results_dir}/03/ldmats
# log everything from this script to a logfile in the results director
exec &> >(tee ${results_dir}/03/logfile)
echo "Updating plink file to have aligned effect alleles"
Rscript resources/genotypes/variant_ids.r ${genotype_processed_dir}/scratch/indep ${genotype_processed_dir}/scratch/indep2 bin/plink2 ${env_threads}
phenolist=( $(cat ${phenotype_processed_dir}/phenolist) )
# Allow specific analysis to be run
# Can take any number between 1:ngwas where ngwas is the number of rows in ${phenotype_processed_dir}/phenolist
index=$1
nphen=`cat ${phenotype_processed_dir}/phenolist | wc -l`
if [ -z $index ]
then
echo "Running all $nphen GWASs"
elif [ ! -z $index ]; then
re='^[0-9]+$'
if ! [[ $index =~ $re ]] ; then
# check if $index is in the phenolist array
if [[ " ${phenolist[@]} " =~ " ${index} " ]]; then
echo "Running GWAS for phenotype $index"
else
echo "error: Index is not a number or a valid phenotype"
echo "Usage: ${0} [index number]"
exit 1
fi
else
if [ "$index" -gt "$nphen" ] ; then
echo "error: Index is larger than number of phenotypes"
echo "Usage: ${0} [index number]"
exit 1
fi
echo "Running $index of $nphen GWASs"
fi
fi
echo $index
ls ${phenotype_processed_dir}/*.phen > ${phenotype_processed_dir}/phenolist
nphen=`cat ${phenotype_processed_dir}/phenolist | wc -l`
echo "Generated ${nphen} phenotype subsets"
echo "Generating LD matrices for each phenotype subset"
# Correlation matrix for each phenotype
i=1
mkdir -p ${results_dir}/03/ldmats
mkdir -p ${genotype_processed_dir}/scratch/tophits
phenolist=( $(cat ${phenotype_processed_dir}/phenolist) )
for phen in ${phenolist[@]}
do
if [ -z $index ] || [[ "$index" == "$phen" ]] || [[ "$index" == "$i" ]] ; then
filename=$(basename -- ${phen})
filename="${filename%.*}"
echo $filename
# Get rsids to keep
ph=$(echo $filename | cut -d "_" -f 1)
echo $ph
if [ ! -f ${genotype_processed_dir}/scratch/tophits/${ph}.bed ]; then
bin/plink2 \
--bfile ${genotype_processed_dir}/scratch/indep2 \
--score resources/genotypes/tophits/${ph}.txt \
--threads ${env_threads} \
--out ${genotype_processed_dir}/scratch/tophits/${ph}
fi
if [ ! -f ${genotype_processed_dir}/scratch/tophits/${ph}.hits ]; then
awk '{ print $1 }' resources/genotypes/tophits/${ph}.txt > ${genotype_processed_dir}/scratch/tophits/${ph}.hits
fi
mkdir -p ${genotype_processed_dir}/scratch/ldmats
# Get IDs to keep
awk '{ print $1, $2 }' ${phen} > ${genotype_processed_dir}/scratch/ldmats/keeptemp
# Get LD matrix
bin/plink2 \
--threads ${env_threads} \
--keep ${genotype_processed_dir}/scratch/ldmats/keeptemp \
--bfile ${genotype_processed_dir}/scratch/indep2 \
--extract ${genotype_processed_dir}/scratch/tophits/${ph}.hits \
--r-unphased ref-based bin4 yes-really \
--out ${results_dir}/03/ldmats/${filename}
rm -r ${genotype_processed_dir}/scratch/ldmats
fi
i=$((i+1))
done
echo "Successfully generated correlation matrices for each phenotype!"
echo "Generating PRS-phenotype associations for each subset"
Rscript resources/phenotypes/score.r ${phenotype_processed_dir}/phenolist ${genotype_processed_dir}/scratch/tophits ${results_dir}/03
echo "Successfully generated scores!"