-
Notifications
You must be signed in to change notification settings - Fork 0
/
BioGrinder.sh
executable file
·225 lines (197 loc) · 6.46 KB
/
BioGrinder.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/bin/bash
# -d fumigatus.fasta
# -p RC-PCR-primers.fasta
# Default paramters
outputDir=false
readDistance=151
fastq=true
baseDir=$(pwd)
databaseModification=false
primerFile="RC-PCR-primers-changed.fasta"
# Parsing given parameters.
while getopts "r:d:p:n:a:f:r:x" OPTION; do
case $OPTION in
r)
readDistance=${OPTARG}
;;
d)
database=${OPTARG}
;;
p)
primerFile=${OPTARG}
;;
n)
runName=${OPTARG}
;;
x)
databaseModification=true
;;
a)
abundanceFile=${OPTARG}
;;
esac
done
# Check for mandatory parameters
if [ -z "$runName" ]; then
echo 'Missing -n' >&2
exit 1
fi
# Check for mandatory parameters
if [ -z "$database" ]; then
echo 'Missing -d' >&2
exit 1
fi
# Create primer file for each primer
boolean=false
header=""
# Open pimer file
while read p; do
# Check for forward primers
if [[ "$p" =~ .*"F".* ]]; then
boolean=true
header=${p}
# Check for reverse primers
elif [[ "$p" =~ .*"R".* ]]; then
boolean=false
header=${p}
# Just a sequence
else
fileName=${header/">"/""}
echo "${header}" > data/createdPrimerFiles/${fileName}.fasta
echo "${p}" >> data/createdPrimerFiles/${fileName}.fasta
fi
done < "${baseDir}/data/primers/${primerFile}"
# Check if modification of database is needed
if [[ "$databaseModification" == "true" ]]; then
firstPrimer=$(grep -m 2 -o '.*' ${baseDir}/data/primers/${primerFile} | cut -d ':' -f 1 | tail -n 1 )
lastPrimer=$(tail -1 ${baseDir}/data/primers/${primerFile})
reverseComplementLastPrimer=$(echo $lastPrimer | tr ACGTacgt TGCAtgca | rev)
# Open given database
while read p; do
# Check for fasta headers
if [[ "${p}" =~ ">".* ]]; then
echo ${p} >> ${baseDir}/data/database/modifiedDatabase.fasta
# All the other fasta lines
else
echo "$firstPrimer$p$reverseComplementLastPrimer" >> ${baseDir}/data/database/modifiedDatabase.fasta
fi
done < "${baseDir}/data/database/${database}"
# Change database to modified database
database="modifiedDatabase.fasta"
fi
# Running BioGrinder for all primer files
FILES="data/createdPrimerFiles/*"
for f in $FILES
do
# Define parameters for BioGrinder
bioGrinderParameters="-reference_file ${baseDir}/data/database/${database} -forward_reverse ${baseDir}/${f} -length_bias 0 -unidirectional -1 -fq 1 -ql 30 10 -rd ${readDistance}"
# Add parameter if it is given be the user
if [ ! -z "$abundanceFile" ]; then
bioGrinderParameters="$bioGrinderParameters -af data/$abundanceFile"
fi
grinder $bioGrinderParameters
# Define the string value
text="$f"
# Split filePath on .
readarray -d . -t strarr <<< "$text"
# Split filePath on /
readarray -d / -t strarr2 <<< "${strarr[0]}"
# Name of output file
outputFileName=`echo ${strarr2[2]}
`
# Move created output
mv grinder-reads.fastq ${baseDir}/data/BioGrinderFiles/$outputFileName-reads.fastq
mv grinder-ranks.txt ${baseDir}/data/BioGrinderAnnotationFiles/$outputFileName-ranks.txt
done
# Create directory for output files
mkdir ${baseDir}/output/${runName}
# Counters that keep count of the amount of fastq headers in the file
forwardCounter=0
reverseCounter=0
# Filling files for forward and reverse amplicons
FILES="data/BioGrinderFiles/*"
# Loop over all files in the folder
for f in $FILES
do
# Split the File name
readarray -d / -t splitFilename <<< "${f}"
# Open the file
while read p; do
# If the files contains forward amplicons, process them
if [[ "${splitFilename[2]}" =~ .*"F".* ]]; then
FILE=$(echo $baseDir/data/BioGrinderFiles/${splitFilename[2]/"F"/"R"} | xargs)
# Check if the line is a fastq header
if [[ "${p}" =~ "@".* ]]; then
# Up the ID by 1 when a new header is found
forwardCounter=$(($forwardCounter + 1))
# Modify the header so it uses the new ID
newHeader=""
readarray -d " " -t splitHeader <<< "${p}"
for val in "${splitHeader[@]}";
do
if [[ "${val}" =~ "@".* ]]; then
newHeader="${newHeader} @${forwardCounter}"
else
newHeader="${newHeader} ${val}"
fi
done
if test -f "$FILE"; then
echo ${newHeader} >> ${baseDir}/output/${runName}/${runName}_forward_reads.fastq
fi
# All the fastq lines that are not headers
else
# Write to the output file
if test -f "$FILE"; then
echo ${p} >> ${baseDir}/output/${runName}/${runName}_forward_reads.fastq
fi
fi
# If the files contains reverse amplicons, process them
elif [[ "${splitFilename[2]}" =~ .*"R".* ]]; then
FILE2=$(echo $baseDir/data/BioGrinderFiles/${splitFilename[2]/"R"/"F"} | xargs)
# Check if the line is a fastq header
if [[ "${p}" =~ "@".* ]]; then
# Up the ID by 1 when a new header is found
reverseCounter=$(($reverseCounter + 1))
# Modify the header so it uses the new ID
newHeader=""
readarray -d " " -t splitHeader <<< "${p}"
for val in "${splitHeader[@]}";
do
if [[ "${val}" =~ "@".* ]]; then
newHeader="${newHeader} @${reverseCounter}"
else
newHeader="${newHeader} ${val}"
fi
done
# Write to the output file
if test -f "$FILE2"; then
echo ${newHeader} >> ${baseDir}/output/${runName}/${runName}_reverse_reads.fastq
fi
# All the fastq lines that are not headers
else
# Write to the output file
if test -f "$FILE2"; then
echo ${p} >> ${baseDir}/output/${runName}/${runName}_reverse_reads.fastq
fi
fi
fi
done < "${baseDir}/${f}"
done
# Zip output files
FILE=${baseDir}/output/${runName}/${runName}_forward_reads.fastq.gz
if test -f "$FILE"; then
gzip ${baseDir}/output/${runName}/${runName}_forward_reads.fastq -f
gzip ${baseDir}/output/${runName}/${runName}_reverse_reads.fastq -f
mv ${baseDir}/output/${runName}/${runName}_forward_reads.fastq.gz ${baseDir}/output/${runName}/${runName}_S99_L001_R1_001.fastq.gz
mv ${baseDir}/output/${runName}/${runName}_reverse_reads.fastq.gz ${baseDir}/output/${runName}/${runName}_S99_L001_R2_001.fastq.gz
else
gzip ${baseDir}/output/${runName}/${runName}_forward_reads.fastq
gzip ${baseDir}/output/${runName}/${runName}_reverse_reads.fastq
mv ${baseDir}/output/${runName}/${runName}_forward_reads.fastq.gz ${baseDir}/output/${runName}/${runName}_S99_L001_R1_001.fastq.gz
mv ${baseDir}/output/${runName}/${runName}_reverse_reads.fastq.gz ${baseDir}/output/${runName}/${runName}_S99_L001_R2_001.fastq.gz
fi
# Remove all used Files
rm $baseDir/data/database/modifiedDatabase.fasta
rm -rfv data/createdPrimerFiles/*
rm -rfv data/BioGrinderFiles/*
rm -rfv data/BioGrinderAnnotationFiles/*