Skip to content

Commit

Permalink
allowing pgen file format to be used to get chrx to work in regenie
Browse files Browse the repository at this point in the history
  • Loading branch information
explodecomputer committed Oct 24, 2024
1 parent 6ae67ff commit 1fefdbb
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 11 deletions.
27 changes: 19 additions & 8 deletions 00-extract-pruned-variants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,25 @@ for i in $(seq 1 $nchr)
do
bgen=$(awk -v i=$i 'NR==i { print $1 }' ${genotype_input_list})
sample=$(awk -v i=$i 'NR==i { print $2 }' ${genotype_input_list})
./bin/plink2 \
--bgen ${bgen} ref-first \
--sample ${sample} \
--extract range ${prunefile} \
--make-bed \
--out ${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen) \
--threads ${env_threads}
echo "${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen)" >> ${genotype_processed_dir}/bgen_extract/mergelist
# check if $sample is empty - this would mean it's a pgen fileset
if [ -z "$sample" ]; then
./bin/plink2 \
--bgen ${bgen} \
--sample ${sample} \
--extract range ${prunefile} \
--make-bed \
--out ${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen) \
--threads ${env_threads}
echo "${genotype_processed_dir}/bgen_extract/$(basename ${bgen})" >> ${genotype_processed_dir}/bgen_extract/mergelist
else
./bin/plink2 \
--bgen ${bgen} ref-first \
--sample ${sample} \
--extract range ${prunefile} \
--make-bed \
--out ${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen) \
--threads ${env_threads}
echo "${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen)" >> ${genotype_processed_dir}/bgen_extract/mergelist
done

./bin/plink2 \
Expand Down
55 changes: 55 additions & 0 deletions 04c-gwas.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

# strict stop if there are any errors
set -e

# get environmental variables
source config.env

# create results directory
mkdir -p ${results_dir}/04

# log everything from this script to a logfile in the results director
exec &> >(tee ${results_dir}/04/logfile_aggregate)

nchr=$(cat ${genotype_input_list} | grep -c '^')
echo $nchr

Rscript ${results_dir}/04 $nchr ${phenotype_processed_dir}/phenolist



nphen=$(cat ${phenotype_processed_dir}/phenolist | grep -c '^')

cat ${phenotype_processed_dir}/phenolist | xargs basename

phenotype_processed_dir="/local-scratch/projects/Lifecourse-GWAS/gib/alspac/phen_proc2"
echo $phenotype_processed_dir

gwas=${phenotype_processed_dir}/$(cat ${phenotype_processed_dir}/phenolist | head -n 10 | tail -n 1)
echo $gwas
for gwas in $(cat ${phenotype_processed_dir}/phenolist)
do
bn=$(basename $gwas | sed "s/.phen$//g")
echo $bn
out=${results_dir}/04/${bn}.regenie.gz
> ${out}
echo $out
# for i in 1:nchr
for i in $(seq 1 $nchr)
do
cat ${phenotype_processed_dir}/regenie/step2_${i}_${bn}.regenie.gz >> $out
done
done

ls -l /local-scratch/projects/Lifecourse-GWAS/gib/alspac/phen_proc2/regenie/step2_*_bmi_10-11_both.regenie.gz

ls -lh $out
ls -lh $out


less

libr


38 changes: 38 additions & 0 deletions utils/bgen_to_pgen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash

set -e

source config.env

pgendir=$1

if [ -z $pgendir ]; then
echo "Usage: ./bgen_to_pgen.sh <pgendir>"
exit 1
fi

mkdir -p ${pgendir}

nchr=$(cat ${genotype_input_list} | grep -c '^')
dn=$(head -n 1 ${genotype_input_list} | awk '{ print $1 }' | xargs dirname)
mkdir -p $dn/pgen

tf=$(mktemp)
for i in $(seq 1 ${nchr})
do
bgen=$(awk -v i=$i 'NR==i { print $1 }' ${genotype_input_list})
sample=$(awk -v i=$i 'NR==i { print $2 }' ${genotype_input_list})
bn=$(basename $bgen .bgen)
dn=$(dirname $bgen)

./bin/plink2 --bgen ${bgen} ref-first --sample ${sample} --make-pgen --out ${pgendir}/${bn} --threads ${env_threads}
echo "${pgendir}/${bn}" >> $tf
done

cp ${genotype_input_list} ${genotype_input_list}.original
mv ${tf} ${genotype_input_list}

echo "Original bgen files are now listed in ${genotype_input_list}.original"
echo "New pgen files are now listed in ${genotype_input_list}"

echo "Successfully converted to bgen files to pgen"
15 changes: 12 additions & 3 deletions utils/update_bgen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ set -e

source config.env

newdir=$1

if [ -z $newdir ]; then
echo "Usage: ./bgen_to_pgen.sh <newdir>"
exit 1
fi

mkdir -p ${newdir}

nchr=$(cat ${genotype_input_list} | grep -c '^')
dn=$(head -n 1 ${genotype_input_list} | awk '{ print $1 }' | xargs dirname)
mkdir -p $dn/bgen1.2
Expand All @@ -16,9 +25,9 @@ do
bn=$(basename $bgen .bgen)
dn=$(dirname $bgen)

./bin/plink2 --bgen ${bgen} ref-first --sample ${sample} --export bgen-1.2 --out ${dn}/bgen1.2/${bn} --threads ${env_threads}
./bin/bgenix -g ${dn}/bgen1.2/${bn}.bgen -index -clobber
echo "${dn}/bgen1.2/${bn}.bgen ${dn}/bgen1.2/${bn}.sample" >> $tf
./bin/plink2 --bgen ${bgen} ref-first --sample ${sample} --export bgen-1.2 --out ${newdir}/${bn} --threads ${env_threads}
./bin/bgenix -g ${newdir}/${bn}.bgen -index -clobber
echo "${newdir}/${bn}.bgen ${newdir}/${bn}.sample" >> $tf
done

cp ${genotype_input_list} ${genotype_input_list}.original
Expand Down

0 comments on commit 1fefdbb

Please sign in to comment.