-
Notifications
You must be signed in to change notification settings - Fork 1
/
SEQUEL_STEP_03.sh
executable file
·113 lines (86 loc) · 6.57 KB
/
SEQUEL_STEP_03.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
# CHECK IMPORTED VARIABLES
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
SCRIPT_Name="$(basename $0)"
if [[ -z "${SPIPE_dpath:+x}" ]] ; then echo -e "\n[${SCRIPT_Name}]:\t\"SPIPE_dpath\" not set!\n" ; exit 1 ; fi
if [[ -z "${SPIPE_smrtc+x}" ]] ; then echo -e "\n[${SCRIPT_Name}]:\t\"SPIPE_smrtc\" not set!\n" ; exit 2 ; fi
if [[ -z "${SPIPE_pre:+x}" ]] ; then echo -e "\n[${SCRIPT_Name}]:\t\"SPIPE_pre\" not set!\n" ; exit 3 ; fi
if [[ -z "${SPIPE_pro:+x}" ]] ; then echo -e "\n[${SCRIPT_Name}]:\t\"SPIPE_pro\" not set!\n" ; exit 4 ; fi
if [[ -z "${SPIPE_step:+x}" ]] ; then echo -e "\n[${SCRIPT_Name}]:\t\"SPIPE_step\" not set!\n" ; exit 5 ; fi
if [[ -z "${SEQUEL_process:+x}" ]] ; then echo -e "\n[${SCRIPT_Name}]:\t\"SEQUEL_process\" not set!\n" ; exit 6 ; fi
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
# INITIALISE VARIABLES
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
SEQL_block="${SPIPE_dpath}/${SPIPE_step}.RUNNING.${SPIPE_smrtc}" # This file acts as a kind of "road block": while it exists the next step cannot start!
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
# RUN
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#
echo "Starting '${SPIPE_step}' ..." > "${SEQL_block}"
# 1) Separate ZMW reads with high quality regions ("High quality positive" = "Hp") from those without ("High quality negative" = "Hn")
# and further split the "Hp" set into those with valid subreads ("Subread positive" = "Sp") and without ("Subread negative" = "Sn")
#..........................................................................................................................................................................................................................................#
########################################################################################################################################################################################
# Line-wrapped command :
#---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#
# awk
# '
# BEGIN{
# outf="";
# }
# {
# if(outf==""){outf=FILENAME}
#
# if($3~/[[:digit:]]+H/){
# if($4~/[[:digit:]]+S/){
# print $0 > outf".HpSp"
# }
# else{
# print $0 > outf".HpSn"
# }
# }
# else{
# print $0 > outf".Hn"
# }
# }
# END{}
# '
# "${SPIPE_pre}/PRE.${SPIPE_smrtc}.txt"
#
########################################################################################################################################################################################
awk 'BEGIN{outf="";}{if(outf==""){outf=FILENAME}if($3~/[[:digit:]]+H/){if($4~/[[:digit:]]+S/){print $0 > outf".HpSp"}else{print $0 > outf".HpSn"}}else{print $0 > outf".Hn"}}END{}' "${SPIPE_pre}/PRE.${SPIPE_smrtc}.txt"
chk=$? # get 'awk' exit code
if [[ "${chk}" -ne 0 ]] ; then
echo -e "\n[${SCRIPT_Name}]:\t'awk' terminated with exit code \"${chk}\" while separating reads!\n"
exit 7
else
rm "${SPIPE_pre}/PRE.${SPIPE_smrtc}.txt" # Remove preprocessed input file (not needed anymore once the data is separated)
fi
#..........................................................................................................................................................................................................................................#
# 2) Process Hn, HpSn, and HpSp read sets
#..........................................................................................................................................................................................................................................#
for i in "Hn" "HpSn" "HpSp" ;
do
SEQL_prefile="${SPIPE_pre}/PRE.${SPIPE_smrtc}.txt.${i}" # Preprocessed file for current subset
SEQL_profile="${SPIPE_pro}/PRO.${SPIPE_smrtc}.txt.${i}" # Processed file for current subset
#- Check the "preprocessed" files for each subset as they could be empty depending on the data
if [[ -e "${SEQL_prefile}" ]] && [[ -s "${SEQL_prefile}" ]] ; then
awk -f "${SEQUEL_process}" "${SEQL_prefile}" > "${SEQL_profile}"
chk=$? # get 'awk' exit code
if [[ "${chk}" -ne 0 ]] ; then
echo -e "\n[${SCRIPT_Name}]:\t'awk' terminated with exit code \"${chk}\" while processing \"${SEQL_prefile}\"!\n"
exit 9
else
rm "${SEQL_prefile}" # Remove preprocessed input file (not needed anymore once the data is processed)
fi
else
echo -e "\n[${SCRIPT_Name}]:\t\"${SEQL_prefile}\" does not exists or is empty, check input data!\n"
exit 8
fi
#-
done
#..........................................................................................................................................................................................................................................#
rm "${SEQL_block}"
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------#