-
Notifications
You must be signed in to change notification settings - Fork 44
/
mongo_js_run.sh
executable file
·287 lines (263 loc) · 12.6 KB
/
mongo_js_run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#!/bin/bash
# Created by Roel Van de Paar, Percona LLC
# To get a better idea on the working of this script, see the header comments in mongo_single_test.sh. Basically mongo_js_run.sh is an intelligent wrapper
# around mongo_single_test.sh. It analyzes output of mongo_single_test.sh & reports on the same. It also stores interesting results to a ${RESULTSDIR} dir.
# User configurable variables
MONGODIR="/sdc/percona-server-mongodb-3.0.7-1.0" # Where mongo/mongod lives (with ./mongod and ./jstests both present!)
RESULTS_DIR="/sdc" # Where failures/bugs are stored
JS_LIST="/sdc/js_tests_to_run.txt" # Temporary file of JS tests (created by this script)
THREADS=10 # Number of threads that should run
TIMEOUT=$[ 60 * 30 ] # Timeout, in seconds, per test (currently 30 min)
FILES_ULIMIT=100000 # Ulimit -n value which script will attempt to set (TokuFT needs many file descriptors)
PRI_ENGINE=PerconaFT # Primary engine to use for testing (usually tokuft)
#SEC_ENGINE=wiredTiger # Compare primary engine against this engine (usually mmapv1 or wiredTiger)
SEC_ENGINE=MMAPv1
# Internal variables
SCRIPT_PWD=$(cd `dirname $0` && pwd)
TEST_IN_PROGRESS=0
WORKDIR=""
RESULTSDIR=""
MUTEX1=0;MUTEX2=0
SEGREGATED=0
CTRL_C_PRESSED=0
# Trap ctrl-c
trap ctrl-c SIGINT
ctrl-c(){
echoit "CTRL+c Was pressed. Results directory: ${RESULTSDIR}" CTRLC
echoit "Attempting to terminate running processes..." CTRLC
touch ${RESULTSDIR}/ctrl_c_was_pressed_during_this_run
kill_pids
sleep 1 # Allows background termination & screen output to finish
echoit "Terminating mongo-js-run.sh with exit code 2..." CTRLC
exit 2
}
kill_pids(){
KILL_PIDS=`ps -ef | grep mongo | grep -v grep | grep -v 'mongo_js_run' | egrep "${WORKDIR}|${RESULTSDIR}" | awk '{print $2}' | tr '\n' ' '`
if [ "${KILL_PIDS}" != "" ]; then
echoit "Terminating the following PID's: ${KILL_PIDS}"
kill -9 ${KILL_PIDS} >/dev/null 2>&1
fi
}
echoit(){
if [ "$2" == "CTRLC" ]; then # CTRL+C was pressed, format output witout echoit_counters
echo "[$(date +'%T')] [CTRL+C] $1"
if [ "${RESULTSDIR}" != "" ]; then echo "[$(date +'%T')] [CTRL+C] $1" >> ${RESULTSDIR}/mongo_js_run.log; fi
elif [ ! -r ${RESULTSDIR}/ctrl_c_was_pressed_during_this_run ]; then # Standard output (CTRL+C was not pressed [yet])
echo "[$(date +'%T')]$(echoit_counters)$1"
if [ "${RESULTSDIR}" != "" ]; then echo "[$(date +'%T')]$(echoit_counters)$1" >> ${RESULTSDIR}/mongo_js_run.log; fi
fi
}
echoit_counters(){
if [ ${TEST_IN_PROGRESS} -eq 0 -o "${TEST_IN_PROGRESS}" == "" ]; then
echo -n " "
else
if [ -r ${RESULTSDIR}/saved.lock ]; then
SAVED=$(cat ${RESULTSDIR}/saved.lock | head -n1)
else
SAVED=0
echo "Warning: Debug assertion: ${RESULTSDIR}/saved.lock did not exist when accessed from echoit_counters(), this should not happen."
fi
echo -n " [${TEST_IN_PROGRESS}/${TEST_COUNT}] [${SAVED}] "
fi
}
loop(){ # $1 to this function is the JS TEST_TO_RUN
RANDOMD=$(echo $RANDOM$RANDOM$RANDOM | sed 's/..\(........\).*/\1/') # Random number generator (8 digits)
WORKDIR="/dev/shm/${RANDOMD}"
echoit "Setting up trial working directory ${WORKDIR}..."
mkdir -p ${WORKDIR}
if [ ! -d ${WORKDIR} ]; then
echo "Assert: attempted to create ${WORKDIR}, but it does not exist after creation!"
exit 1
fi
cp ${SCRIPT_PWD}/mongo_single_test.sh ${WORKDIR}/mongo_single_test.sh
sed -i "s|^[ \t]*TEST_TO_RUN=.*|TEST_TO_RUN=${1}|;s|^[ \t]*DEBUG=.*|DEBUG=0|" ${WORKDIR}/mongo_single_test.sh
sed -i "s|^[ \t]*PRI_ENGINE=.*|PRI_ENGINE=${PRI_ENGINE}|;s|^[ \t]*SEC_ENGINE=.*|SEC_ENGINE=${SEC_ENGINE}|" ${WORKDIR}/mongo_single_test.sh
timeout --signal=9 ${TIMEOUT}s ${WORKDIR}/mongo_single_test.sh ${WORKDIR} ${MONGODIR}
RESULT=$? # 0: Both engines succeeded | 1: PRI engine failed only | 2: SEC engine failed only | 3: Both engines failed | 4: Unknown issue (should not happen) | 137: timeout
if [ ${RESULT} -eq 0 ]; then
echoit "Both engines succeeded on ${1}, deleting test results..."
rm -Rf ${WORKDIR}
elif [ ${RESULT} -eq 1 ]; then
update_saved_counter
echoit "Primary engine (TokuMX[se]) failed on ${1}, saving details in ${RESULTSDIR}/${RANDOMD}..."
mv ${WORKDIR} ${RESULTSDIR}
if [ ! -d ${RESULTSDIR}/${RANDOMD} ]; then
echo "Assert: attempted to move ${WORKDIR} to ${RESULTSDIR}, but this seems to have failed. Out of disk space maybe? Terminating run..."
exit 1
fi
elif [ ${RESULT} -eq 2 ]; then
echoit "Only secondary engine failed on ${1}, ignoring & deleting test results..."
rm -Rf ${WORKDIR}
elif [ ${RESULT} -eq 3 ]; then
#Once tokuft-only failures are cleared, we can start looking into test failures where both engines fail. Ftm, we are ignoring/deleting them
#update_saved_counter
#echoit "Both engines failed on ${1}, saving details in ${RESULTSDIR}/${RANDOMD}..."
#mv ${WORKDIR} ${RESULTSDIR}
#if [ ! -d ${RESULTSDIR}/${RANDOMD} ]; then
# echo "Assert: attempted to move ${WORKDIR} to ${RESULTSDIR}, but this seems to have failed. Out of disk space maybe? Terminating run..."
# exit 1
#fi
echoit "Both engines failed on ${1}, ignoring & deleting test results..."
rm -Rf ${WORKDIR}
elif [ ${RESULT} -eq 137 ]; then
echoit "Test ${1} was interrupted as it went over the ${TIMEOUT}s timeout, saving details in ${RESULTSDIR}/${RANDOMD}..."
update_saved_counter
mv ${WORKDIR} ${RESULTSDIR}
if [ ! -d ${RESULTSDIR}/${RANDOMD} ]; then
echo "Assert: attempted to move ${WORKDIR} to ${RESULTSDIR}, but this seems to have failed. Out of disk space maybe? Terminating run..."
exit 1
fi
mv ${RESULTSDIR}/${RANDOMD} ${RESULTSDIR}/${RANDOMD}_TIMEOUT
if [ ! -d ${RESULTSDIR}/${RANDOMD}_TIMEOUT ]; then
echo "Assert: attempted to move/rename ${RESULTSDIR}/${RANDOMD} to ${RESULTSDIR}/${RANDOMD}_TIMEOUT, but this seems to have failed. Terminating run..."
exit 1
fi
echo "Trial ${1} was interrupted as it went over the ${TIMEOUT}s timeout!" > ${RESULTSDIR}_TIMEOUT/${RANDOMD}/this_trial_was_interrupted.txt
elif [ ${RESULT} -ge 4 -o ${RESULT} -lt 0 ]; then
echoit "Assert: mongo_single_test.sh returned exit code ${RESULT} for test ${1}, this should not happen! Terminating run..."
exit 1
fi
}
update_saved_counter(){ # Use a MUTEX to ensure that two or more threads do not update the failure counter ($SAVED) at the same time
if [ ${MUTEX2} -eq 0 ]; then
MUTEX2=1
else
while [ ${MUTEX2} -ne 0 ]; do
sleep 2
done
MUTEX2=1
fi
SAVED=$(cat ${RESULTSDIR}/saved.lock | head -n1)
SAVED=$[ ${SAVED} + 1 ]
echo ${SAVED} > ${RESULTSDIR}/saved.lock
MUTEX2=0
}
start_thread(){ # Use a MUTEX to ensure that two or more threads do not update the test counter / access the tests list file at the same time
if [ ${MUTEX1} -eq 0 ]; then
MUTEX1=1
else
while [ ${MUTEX1} -ne 0 ]; do
sleep 2
done
MUTEX1=1
fi
TEST_INPUT_FILE_TO_USE=
if [ ${SEGREGATED} -eq 0 ]; then
TEST_INPUT_FILE_TO_USE=${RESULTSDIR}/jstests.list
elif [ ${SEGREGATED} -eq 1 ]; then
TEST_INPUT_FILE_TO_USE=${RESULTSDIR}/jstests_single.list
else
echoit "Assert: \${SEGREGATED}!=0 && \${SEGREGATED}!=1"
exit 1
fi
TEST_IN_PROGRESS=$[ ${TEST_IN_PROGRESS} + 1 ]
TEST_TO_RUN="$(head -n${TEST_IN_PROGRESS} ${TEST_INPUT_FILE_TO_USE} | tail -n1)" # Path inside jstests + test name. For example, TEST_TO_RUN=core/system_profile.js
loop ${TEST_TO_RUN} &
MUTEX1=0
}
RANDOMD=$(echo $RANDOM$RANDOM$RANDOM | sed 's/..\(......\).*/\1/') # Random number generator (6 digits)
RESULTSDIR=$(echo "/${RESULTS_DIR}/${RANDOMD}" | sed 's|//|/|g')
mkdir -p ${RESULTSDIR}
echo "0" > ${RESULTSDIR}/saved.lock # ${SAVED} count will be read, and updated into, from/to this file
if [ ! -d ${WORKDIR} ]; then
echo "Assert: attempted to create ${RESULTSDIR}, but it does not exist after creation!"
exit 1
else
if [ ! -r ${RESULTSDIR}/saved.lock ]; then
echo "Assert: attempted to create ${RESULTSDIR}/saved.lock, but it does not exist or cannot be read (privileges issue?) after creation!"
exit 1
else
echoit "MJR (mongo-js-run.sh) v1.03 | Threads: ${THREADS} | Per-test timeout: ${TIMEOUT}s | Results directory: ${RESULTSDIR}"
echoit "Mongo base directory: ${MONGODIR} | Primary engine: ${PRI_ENGINE} | Secondary engine: ${SEC_ENGINE}"
echoit "Setup main results directory ${RESULTSDIR}..."
fi
fi
echoit "Making a copy of ${WORKDIR}/mongo_js_run.sh to ${RESULTSDIR} for later reference..."
cp ${SCRIPT_PWD}/mongo_js_run.sh ${RESULTSDIR}
echoit "Terminating all owned mongod instances..."
${SCRIPT_PWD}/mongo_kill_procs_safe.sh
echoit "Attempting to increase/set ulimit -n to ${FILES_ULIMIT}..."
ulimit -n ${FILES_ULIMIT} 2>/dev/null
if [ "$(ulimit -n)" != "${FILES_ULIMIT}" ]; then
echoit "Assert: After attempting to set ulimit -n to ${FILES_ULIMIT}, the ulimit -n setting is [still] $(ulimit -n) instead!"
echoit "Using sudo, edit /etc/security/limits.conf and add the following line to the end of the file:"
echoit "* hard nofile $(echo $[ ${FILES_ULIMIT} + 1000 ])"
exit 1
fi
echoit "Compiling list of all JS tests to be executed..."
if [ ! -d ${MONGODIR}/jstests ]; then
echoit "Assert: Before changing directories into ${MONGODIR}/jstests, this script checked the existence of this directory, and failed!"
exit 1
else
rm -f ${RESULTSDIR}/jstests.list; touch ${RESULTSDIR}/jstests.list
cd ${MONGODIR}/jstests
find . | grep "\.js$" > ${RESULTSDIR}/jstests.list
TEST_COUNT=$(cat ${RESULTSDIR}/jstests.list 2>/dev/null | wc -l)
if [ "${TEST_COUNT}" == "" ]; then TEST_COUNT=0; fi
if [ ${TEST_COUNT} -lt 1000 ]; then # Currently there are 1727 tests!
echoit "Assert: The number of all JS tests (${TEST_COUNT}) is too small, check for build issues & verify contents of ${RESULTSDIR}/jstests.list!"
exit 1
else
echoit "${TEST_COUNT} JS tests discovered..."
fi
fi
echoit "Segregating tests which require a single threaded run..."
if [ ! -r ${SCRIPT_PWD}/known_bugs_tokumxse.strings ]; then
echoit "Assert: ${SCRIPT_PWD}/known_bugs_tokumxse.strings not found?"
exit 1
fi
cat ${SCRIPT_PWD}/known_bugs_tokumxse.strings | grep "|single[ \t]\+" | grep -v "^#" | sed 's/\(.*\)|single.*/\1/' > ${RESULTSDIR}/jstests_single.list
SINGLE_TEST_COUNT=$(cat ${RESULTSDIR}/jstests_single.list 2>/dev/null | wc -l)
if [ "${SINGLE_TEST_COUNT}" == "" ]; then SINGLE_TEST_COUNT=0; fi
echoit "${SINGLE_TEST_COUNT} Tests discovered which require a single threaded run. Segregating..."
while read line; do
if [ $(grep -c $line ${RESULTSDIR}/jstests_single.list) -eq 0 ];then
echo $line >> ${RESULTSDIR}/jstests.list.final
fi
done < ${RESULTSDIR}/jstests.list
rm ${RESULTSDIR}/jstests.list
mv ${RESULTSDIR}/jstests.list.final ${RESULTSDIR}/jstests.list
TEST_COUNT=$(cat ${RESULTSDIR}/jstests.list 2>/dev/null | wc -l)
if [ "${TEST_COUNT}" == "" ]; then TEST_COUNT=0; fi
if [ ${TEST_COUNT} -lt 1000 ]; then # Currently there are 1727 tests!
echoit "Assert: The number of all JS tests (${TEST_COUNT}) is too small, check for build issues & verify contents of ${RESULTSDIR}/jstests.list!"
exit 1
fi
echoit "Segregation complete. ${TEST_COUNT} (main) + ${SINGLE_TEST_COUNT} (single-threaded) JS tests armed..."
echoit "Starting MJR main test loop (non-segregated tests)..."
while true; do
if [ -r ${RESULTSDIR}/ctrl_c_was_pressed_during_this_run ]; then break; fi
while [ $(jobs -r | wc -l) -lt ${THREADS} ]; do # Fixed 1 thread max
if [ ${TEST_IN_PROGRESS} -ge ${TEST_COUNT} ]; then # All tests done or started
wait # Wait for all current tests to finish
break # Exit inner loop
fi
start_thread
done
if [ ${TEST_IN_PROGRESS} -ge ${TEST_COUNT} ]; then # All tests done or started
wait # Wait for all current tests to finish (more a safety net then anything)
break # Exit outer loop
fi
done
echoit "Starting MJR main test loop (segregated single thread tests)..."
SEGREGATED=1
TEST_IN_PROGRESS=0
MUTEX1=0;MUTEX2=0
while true; do
if [ -r ${RESULTSDIR}/ctrl_c_was_pressed_during_this_run ]; then break; fi
while [ $(jobs -r | wc -l) -lt 1 ]; do # Fixed 1 thread max
if [ ${TEST_IN_PROGRESS} -ge ${SINGLE_TEST_COUNT} ]; then # All tests done or started
wait # Wait for all current tests to finish
break # Exit inner loop
fi
start_thread
done
if [ ${TEST_IN_PROGRESS} -ge ${SINGLE_TEST_COUNT} ]; then # All tests done or started
wait # Wait for all current tests to finish (more a safety net then anything)
break # Exit outer loop
fi
done
echoit "Cleaning up any rogue processes..."
kill_pids
echoit "Done! Results are stored in the following directory: ${RESULTSDIR}"
exit 0