forked from MorrellLAB/sequence_handling
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathConfig_Indel_Realign
172 lines (136 loc) · 6.4 KB
/
Config_Indel_Realign
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/bin/bash
# This config runs indel realignment steps only. Please see "Config" file for full pipeline.
# More complete information on how to fill out
# this Config file can be found at:
# https://github.com/MorrellLAB/sequence_handling/wiki/Configuration
###################################################
########## Shared across handlers ##########
###################################################
# Where are we storing the output files?
# Final directory is ${OUT_DIR}/Name_of_Handler
OUT_DIR=
# Name this project
PROJECT=
# What email should we use for job notifications?
EMAIL=
# What encoding is used for quality values?
# Look at the FastQC files to determine
# the sequence encoding. Choose from:
# 'sanger', 'illumina'
# 'solexa', or 'phred'
QUAL_ENCODING=
# Sequencing platform technology
# What platform were the reads produced?
# Valid options are:
# CAPILLARY, LS454, ILLUMINA,
# SOLID, HELICOS, IONTORRENT
# ONT, and PACBIO
SEQ_PLATFORM=
# What reference genome are we using?
# Include the full file path.
REF_GEN=
# Is this organism barley?
# Choose from: "true" or "false"
BARLEY=
# Are you running the analysis on the Minnesota Supercomputing Institute (MSI)?
# Choose from: "true" or "false"
MSI=
# Are you submitting the job with qsub from PBS (Portable Batch System)
# Choose from: "true" or "false"
USE_PBS="false"
# Are you submitting the job with sbatch from Slurm?
# Choose from: "true" or "false"
USE_SLURM="true"
# Do the quality scores need to be adjusted for GATK? Default: false
# Change to true if you get errors from GATK like:
# "<Sample> appears to be using the wrong encoding for quality scores: we encountered an extremely high quality score"
FIX_QUALITY_SCORES=
# Where should Picard/GATK store temporary files?
# If you've encountered issues with running out of temp space
# with picard, you can optionally specify a temp directory
# Otherwise, leave blank
TMP=
############################################
######## Realigner_Target_Creator ########
############################################
# Which queue are we submitting the job to?
# Note: For USE_PBS, we can only specify one queue
# For USE_SLURM, we can specify multiple queues that are comma separated, no spaces (e.g., "small,ram256g,ram1t")
RTC_QUEUE="small"
# USE_SLURM: What are our sbatch settings for Variant_Filtering?
# Below are the recommended settings
RTC_SBATCH="--nodes=1 --ntasks-per-node=16 --mem=22gb --tmp=12gb -t 24:00:00 --mail-type=ALL --mail-user=${EMAIL} -p ${RTC_QUEUE}"
# USE_PBS: What are our Qsub settings for Realigner_Target_Creator?
# Below are the recommended settings
RTC_QSUB="mem=22gb,nodes=1:ppn=16,walltime=24:00:00"
# Where is the list of finished BAM files?
# To generate this list, use sample_list_generator.sh
# located at /sequence_handling/HelperScripts/sample_list_generator.sh
RTC_BAM_LIST=
############################################
######### Indel_Realigner #########
############################################
# Which queue are we submitting the job to?
# Note: For USE_PBS, we can only specify one queue
# For USE_SLURM, we can specify multiple queues that are comma separated, no spaces (e.g., "small,ram256g,ram1t")
IR_QUEUE="small"
# USE_SLURM: What are our sbatch settings for Variant_Filtering?
# Below are the recommended settings
IR_SBATCH="--nodes=1 --ntasks-per-node=16 --mem=22gb --tmp=12gb -t 24:00:00 --mail-type=ALL --mail-user=${EMAIL} -p ${IR_QUEUE}"
# USE_PBS: What are our Qsub settings for Indel_Realigner?
# Below are the recommended settings
IR_QSUB="mem=22gb,nodes=1:ppn=16,walltime=24:00:00"
# Where is the list of finished BAM files?
# To generate this list, use sample_list_generator.sh
# located at /sequence_handling/HelperScripts/sample_list_generator.sh
IR_BAM_LIST=
# Where is the list of realigner targets files (.intervals)?
# To generate this list, use sample_list_generator.sh
# located at /sequence_handling/HelperScripts/sample_list_generator.sh
IR_TARGETS=
# IR_BAM_LIST and IR_TARGETS must have the same sample order.
# What is the LOD threshold above which the cleaner will clean?
# GATK default: 5.0, Barley: 3.0
LOD_THRESHOLD=3.0
# What is the percentage of mismatches at a locus to be considered having high entropy (0.0 < entropy <= 1.0)?
# GATK default: 0.15, Barley: 0.10
ENTROPY_THRESHOLD=0.10
# What is the maximum reads allowed to be kept in memory at a time by the SAMFileWriter?
# If you are running into memory issues, try reducing this number.
# Default: 150000
MAX_READS_IN_MEM=150000
############################################
########## Dependencies ##########
############################################
# This section defines installations to
# various dependencies for sequence_handling.
# If you are using the Minnesota Supercomputing Institute cluster
# then uncomment all 'module load' lines.
# Make sure you have access to all '_ML' modules.
# If you need to install a dependency from source,
# then uncomment the lines for the dependency and the
# 'export PATH=', and write the full path to the executable
# for the program. For example:
# PARALLEL=${HOME}/software/parallel-20151122/bin/parallel
# If you have a system-wide installation for a program, you can
# leave all lines commented out. sequence_handling will find
# system-wide installed programs automatically.
# Please visit https://github.com/MorrellLab/sequence_handling/wiki/Dependencies
# for information on version requirements and compatibility
if [[ "$MSI" == "true" ]] ; then
# Do we have GNU parallel installed
module load parallel/20190122
#PARALLEL=
#export PATH=${PARALLEL}:${PATH}
# Do we have Java installed?
module load java/openjdk-8_202
#JAVA=
#export PATH=${JAVA}:${PATH}
# What is the full file path for the GATK jar file?
# You need BOTH the jar file and the module
# NOTE: Indel realignment functionality is not in GATK v4. Please use GATK v3.
GATK_JAR=/panfs/jay/groups/9/morrellp/public/Software/GATK_ML_3.8.1-0-gf15c1c3ef/GenomeAnalysisTK.jar
module load gatk_ML/3.8.1
# What is the full file path for the Picard jar file?
PICARD_JAR=/home/morrellp/public/Software/picard_ML_2.20.2/picard.jar
fi