-
Notifications
You must be signed in to change notification settings - Fork 0
/
stat_usage_web_server.sh
executable file
·206 lines (175 loc) · 7.57 KB
/
stat_usage_web_server.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env bash
# Filename:
# Description: usage analysis of web-servers
# Author: Nanjiang Shu ([email protected])
set -e
#set -x
progname=`basename $0`
size_progname=${#progname}
wspace=`printf "%*s" $size_progname ""`
usage="
Usage: $progname method [method ...]
Options:
-o OUTFILE Set output file
-start-date STR Set the start date for analysis, in the format 2015-01-01
-end-date STR Set the end date for analysis
-onlydata Show only data, no comments
-special-country Show number of users for specified countries
-out-numuser-country FILE
Output number of users per country
-out-numjob-country FILE
Output number of jobs per country
-h, --help Print this help message and exit
Created 2017-05-03, updated 2022-01-13, Nanjiang Shu
"
declare -A ratioEGI=( ["topcons2"]="0.5" ["scampi2"]="0.2" ["proq3"]="0.3" ["pconsc3"]="1.0" ["boctopus2"]="0.25" ["subcons"]="0.25" ["prodres"]="1.0" )
nodename=`uname -n`
case $nodename in
pcons*) webserver_base=/big/server/var/www;;
*) webserver_base=/data3/server;;
esac
UsageAna(){ #{{{
local method=$1
local startdate=$2
local enddate=$3
case $method in
topcons2|proq3|pconsc3|subcons|prodres|scampi2|boctopus2)
#infile1=/var/www/html/$method/proj/pred/static/log/all_submitted_seq.log
#infile2=/var/www/html/$method/proj/pred/static/log/submitted_seq.log
infile1=${webserver_base}/web_$method/proj/pred/static/log/all_submitted_seq.log
infile2=${webserver_base}/web_$method/proj/pred/static/log/submitted_seq.log
if [ -f $infile1 ]; then
infile=$infile1
else
infile=$infile2
fi
if [ $isDebug -eq 1 ]; then
echo "Using input file $infile"
fi
uniqiplistfile=$tmpdir/uniqiplist.$method.txt
anafile=$tmpdir/uniqidlist.$method.ana.txt
numseqfile=$tmpdir/$method.numseq.txt
awk -F "\t" -v d1=$startdate -v d2=$enddate '{ip=$3; split($1,ss," "); date=ss[1]; gsub(/-/, "", date); if(date>=d1 && date<=d2) {print ip}}' $infile | sort -u > $uniqiplistfile
awk -F "\t" -v d1=$startdate -v d2=$enddate '{ip=$4; split($1,ss," "); date=ss[1]; gsub(/-/, "", date); if(date>=d1 && date<=d2) {print ip}}' $infile > $numseqfile
my_ip2country.py -l $uniqiplistfile -show-eu > $anafile
numJob=$(cat $numseqfile |wc -l)
numSeq=$(cat $numseqfile | awk 'BEGIN{sum=0}{sum+=$1}END{print sum}')
numUser=$(awk -F "\t" '{print $1}' $anafile | sort -u |wc -l )
numCountry=$(awk -F "\t" '{print $2}' $anafile | sort -u |wc -l )
numUserEU=$(awk -F "\t" '{if ($3=="EU") print $2}' $anafile | wc -l )
#echo -e "#Method\tNumUser\tNumCountry\tNumUser_EU\tPercentEU"
if [ $numUser -eq 0 ];then
percentEU=0
else
percentEU=$(python -c "print (float($numUserEU)/$numUser*100)")
fi
printf "%-9s %8d %10d %10d %10.1f" $method $numUser $numCountry $numUserEU $percentEU
for ((ic=0; ic<numSpecialCountry; ic++));do
country=${specialCountryList[$ic]}
numUserCountry=$(awk -v country=${country} -F "\t" '{if ($2==country) print $2}' $anafile | wc -l )
printf " %15d" $numUserCountry
done
printf " %10d %10d %6s\n" $numJob $numSeq ${ratioEGI[$method]}
# output number of users (counted as unique IP)
if [ "$outfile_numuser_country" != "" ]; then
awk -F"\t" '{print $2}' $anafile | sort | uniq -c | sort -nr | awk -v method=$method '{ss=$2; for (i=3; i<=NF; i++) {ss=ss" "$i}; printf("%s\t%s\t%s\n", method, ss, $1)}' >> $outfile_numuser_country
fi
# output number of jobs per country
if [ "$outfile_numjob_country" != "" ];then
all_job_iplistfile=$tmpdir/all_job_iplist.$method.txt
all_job_countrylistfile=$tmpdir/all_job_countrylist.$method.txt
awk -F "\t" -v d1=$startdate -v d2=$enddate '{ip=$3; split($1,ss," "); date=ss[1]; gsub(/-/, "", date); if(date>=d1 && date<=d2) {print ip}}' $infile > $all_job_iplistfile
my_ip2country.py -l $all_job_iplistfile -show-eu > $all_job_countrylistfile
awk -F"\t" '{print $2}' $all_job_countrylistfile | sort | uniq -c | sort -nr | awk -v method=$method '{ss=$2; for (i=3; i<=NF; i++) {ss=ss" "$i}; printf("%s\t%s\t%s\n", method, ss, $1)}' >> $outfile_numjob_country
fi
;;
esac
} #}}}
if [ $# -lt 1 ]; then
echo "$usage"
exit
fi
methodList=()
startdate=19000000
enddate=22000000
isShowOnlyData=0
isAnaPerMonth=0
isDebug=0
specialCountryList=()
outfile_numuser_country=
outfile_numjob_country=
tmpdir=$(mktemp -d /tmp/tmpdir.stat_usage_web_server.XXXXXXXXX) || { echo "Failed to create temp dir" >&2; exit 1; }
#trap 'rm -rf "$tmpdir"' INT TERM EXIT
isNonOptionArg=0
while [ "$1" != "" ]; do
if [ $isNonOptionArg -eq 1 ]; then
methodList+=("$1")
isNonOptionArg=0
elif [ "$1" == "--" ]; then
isNonOptionArg=true
elif [ "${1:0:1}" == "-" ]; then
case $1 in
-h | --help) echo "$usage"; exit;;
-start-date|--start-date) startdate=$2;shift;;
-end-date|--end-date) enddate=$2;shift;;
-onlydata|--onlydata) isShowOnlyData=1;;
-permonth|--permonth) isAnaPerMonth=1;;
-spc|--special-country) specialCountryList+=("$2");shift;;
-out-numuser-country) outfile_numuser_country=$2;shift;;
-out-numjob-country) outfile_numjob_country=$2;shift;;
-q|-quiet|--quiet) isQuiet=1;;
-debug|--debug) isDebug=1;;
-*) echo Error! Wrong argument: $1 >&2; exit;;
esac
else
methodList+=("$1")
fi
shift
done
numMethod=${#methodList[@]}
numSpecialCountry=${#specialCountryList[@]}
if [ $numMethod -eq 0 ]; then
echo Input not set! Exit. >&2
exit 1
fi
if [ $isShowOnlyData -eq 0 ];then
echo -e "Web server usage statistics for the period $startdate to $enddate \n"
fi
if [ "$outfile_numuser_country" != "" ];then
cat /dev/null > $outfile_numuser_country
fi
if [ "$outfile_numjob_country" != "" ];then
cat /dev/null > $outfile_numjob_country
fi
if [ $isAnaPerMonth -eq 0 ];then
startdate=${startdate//-/}
enddate=${enddate//-/}
printf "%-9s %8s %10s %10s %10s" "#Method" "NumUser" "NumCountry" "NumUserEU" "PercentEU"
for ((ic=0; ic< numSpecialCountry; ic++));do
country=${specialCountryList[$ic]}
printf " %15s" "NUser$country"
done
printf " %10s %10s %6s\n" "NumJob" "NumSeq" "RatioEGI"
for ((im=0;im<numMethod;im++));do
method=${methodList[$im]}
UsageAna "$method" $startdate $enddate
done
else
: #permonth analysis not implemented
fi
if [ $isShowOnlyData -eq 0 ];then
echo """
#==========================================================================================
NumUser NumUser is calculated as the unique IP address the job is submitted from
NumCountry Number of countries the users are from
NumUserEU Number of users from the European countries
PercentEU Percentage of users that are from the European countries out of the whole world
NumJob Number of jobs submitted to the server
NumSeq Number of sequences (queries) submitted to the server, one job can have multiple sequences.
"""
fi
if [ $isDebug -eq 0 ]; then
rm -rf $tmpdir
else
echo "Temporary files are kept at $tmpdir"
fi