diff --git a/00a-genotype-organisation.sh b/00a-genotype-organisation.sh index f44d36d..0ec7395 100755 --- a/00a-genotype-organisation.sh +++ b/00a-genotype-organisation.sh @@ -19,12 +19,12 @@ Rscript resources/genotypes/organise_samples.r ${genotype_input_list} ${genotype echo "Get list of pruned SNPs" -if test -f "resources/genotypes/hm3_prune_th_${build}.bed.gz"; then +if test -f "resources/genotypes/hm3_prune_th_${genome_build}.bed.gz"; then echo "Found prune file" prunefile="${genotype_processed_dir}/scratch/indep.prune.in" - gunzip -c resources/genotypes/hm3_prune_th_${build}.bed.gz > ${prunefile} + gunzip -c resources/genotypes/hm3_prune_th_${genome_build}.bed.gz > ${prunefile} else - echo "Error: Prune file resources/genotypes/hm3_prune_th_${build}.bed.gz not found" + echo "Error: Prune file resources/genotypes/hm3_prune_th_${genome_build}.bed.gz not found" exit 1 fi @@ -84,8 +84,13 @@ do --out ${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen) \ --threads ${env_threads} echo "${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen)" >> ${genotype_processed_dir}/bgen_extract/mergelist + + # rename any duplicates to be unique + Rscript resources/genotypes/dups_bim.r "${genotype_processed_dir}/bgen_extract/$(basename ${bgen} .bgen).bim" done + + ./bin/plink2 \ --threads ${env_threads} \ --pmerge-list bfile ${genotype_processed_dir}/bgen_extract/mergelist \ diff --git a/resources/genotypes/dups_bim.r b/resources/genotypes/dups_bim.r new file mode 100644 index 0000000..f795240 --- /dev/null +++ b/resources/genotypes/dups_bim.r @@ -0,0 +1,6 @@ +library(data.table) +library(dplyr) + +a <- fread(commandArgs(T)[1]) +a$V2 <- make.unique(a$V2) +fwrite(a, commandArgs(T)[1], col.names = F, row.names = F, quote = F, sep = "\t")