-
Notifications
You must be signed in to change notification settings - Fork 1
/
grader.py
380 lines (304 loc) · 13 KB
/
grader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
#!/usr/bin/env python3
"""Many thanks to the outstanding example written by Github user 'dagg'.
This code has been forked from the following repository:
https://github.com/dagg/OpenEdxExternalGrader
Remark: No need to change anything here.
"""
import gc
import json
import logging
import os
import random
import re
import shutil
import subprocess
import sys
import time
import traceback
from http.server import HTTPServer, BaseHTTPRequestHandler
from json.decoder import JSONDecodeError
from socketserver import ThreadingMixIn
from setup_logging import setup_logging
class Handler(BaseHTTPRequestHandler):
"""Handle the basic communication with the XQueue."""
# pylint: disable=invalid-name
def do_HEAD(self):
"""."""
pass
# self.send_response(200)
# self.send_header('Content-type', 'text/html')
# self.end_headers()
def do_GET(self):
"""Handling a normal GET request.
This will be called if somebody enters the adress in his browser:
http://gradermat101.math.uzh.ch:10101
You can for example return some HTML.
"""
with open('doc/Tutorial.html', 'br') as file:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(file.read())
def do_POST(self):
"""This handles a POST request."""
_time_start = time.time()
body_len = int(self.headers['Content-Length'])
_body = self.rfile.read(body_len).decode()
logging.debug('Received content: %s', _body)
try:
body_content = json.loads(_body)
except JSONDecodeError:
logging.error('JSON: could not parse received content.')
# result = process_result({'correct': False, 'error': 'Could not parse POST.'})
else:
problem_name, student_response, _user_id = get_info(body_content)
# Grade the student response and format the results.
logging.info('(%s) submitted code for problem %s.', _user_id, problem_name)
result = grade(problem_name, student_response)
logging.debug('Result: %s', result)
_send = json.dumps(result).encode()
self.send_response(200)
self.end_headers()
self.wfile.write(_send)
logging.info('answered (%s) in %f seconds.', _user_id, time.time()-_time_start)
logging.debug('answer data: %s', _send)
# Capture HTTP logging into our logging file.
def log_message(self, format, *args): # pylint: disable=redefined-builtin
logging.debug(format, *args)
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
""" This class allows to handle requests in separated threads.
No further content needed, don't touch this. """
def grade(problem_name, student_response, do_processing=True):
"""
Arguments:
problem_name ():
student_response (str): Str containing python code
Keyword argument:
do_processing (bool): return formatted text ready to send back.
If False, it will return the output from
the `check` function.
default: True
Returns:
result: (List of dict): dictionary or list of dictionaries.
Each dict is the result of one test
case and has the following form:
{'correct': True/False
'function': Message/code to be
displayed as input
'result': Output of evaluation.
'expected': Solution (only if correct==False)
}
"""
try:
randfilename = randgen()
# Create tmp directory if it does not exist
if not os.path.exists('tmp'):
logging.warning('Creating new directory ./tmp')
os.makedirs('tmp')
# Create python file to be tested from student's submitted program
student_program = 'Program{}_{}'.format(problem_name, randfilename)
source_file = open('tmp/{}.py'.format(student_program), 'w')
# student_code = re.sub(r'(?<!\\)\t+', ' ', student_code)
source_file.write(student_response)
source_file.close()
# Use pytest to test the student's submitted program with the
# help of the appropriate test runner
# When called with systemd we need absolute paths, so I
# entered it here.
# FIXME: This is very hacky. On the executing machine, we need absolute path, on thinlinc `python3`
# and on modern devices propably `python`
if os.path.exists('/opt/anaconda/anaconda3/bin/python'):
_path = '/opt/anaconda/anaconda3/bin/python'
elif shutil.which('python3') is not None:
_path = 'python3'
else:
_path = 'python'
process = subprocess.Popen([_path,
'testrunner.py',
problem_name, student_program],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
_timeout = 20
try:
out, err = process.communicate(timeout=_timeout)
except subprocess.TimeoutExpired:
process.kill()
res = {'correct':False, 'error': 'Timeout: Evaluation aborded after '
'{} seconds (Check for infinite '
'loops)'.format(_timeout)}
else:
logging.debug('testrunner qutput:\n%s', out.decode())
if err:
logging.warning('testrunner crashed: %s', _wrap(err.decode()))
#logging.debug('testrunner quit with error:\n%s', err.decode())
res = {'correct':False, 'error':err.decode()}
else:
if isinstance(out, bytes):
out = out.decode('utf-8')
try:
res = json.loads(out)
except JSONDecodeError:
logging.error(_wrap(traceback.format_exc()))
res = {'correct':False, 'error': 'There was an error evaluating your code! Check your Syntax.'}
# remove student's program from disk
os.remove('tmp/{}.py'.format(student_program))
# garbage collect, TODO: why do we need that?
gc.collect()
except Exception:
logging.error(_wrap(traceback.format_exc()))
res = {'correct':False, 'error': 'There seems to be a System error :('}
logging.debug('processing output...')
if do_processing:
return process_result(res)
return res
def process_result(result):
""" Take a list of test results and create xqueue response.
The test results will be formatted with HTML.
Arguments:
result (list): List of test cases, each test case is a dict.
A test case dict is of either of the following forms:
{'correct': (bool), 'function': (str),
'result': (str), 'expected': (str)}
or
{'correct': False, 'error': (str)}
where we have
correct (bool): result
function (str): Explains what has been tested.
result (str): output of the student's code
expected (str): correct result.
error (str): Error message of error that did not allow
to do the test (SyntaxError, NameError, ...)
Returns:
msg (byte): json dump of xqueue response. This contains a dict
{'correct': (bool), 'score': (float), 'msg': (str)}
with
correct (bool): overall result
score (float): between 0 and 1.
msg (str): HTML formatted test results
"""
_start = """
<div class="test">
<header>Test results</header>
<section>
<div class="shortform">
{}
<a href="#" class="full full-top">See full output</a>
<a href="#" class="full full-bottom">See full output</a>
</div>
<div class="longform" style="display: none;">"""
_end = """
</div>
</section>
</div>"""
_correct = """
<div class="result-output result-correct">
<h4>{header}</h4>
<pre>{function}</pre>
<dl>
<dt>Output:</dt>
<dd class="result-actual-output"><pre>{result}</pre></dd>
</dl>
</div>"""
_wrong = """
<div class="result-output result-incorrect">
<h4>{header}</h4>
<pre>{function}</pre>
<dl>
<dt>Your output:</dt>
<dd class="result-actual-output"><pre>{result}</pre></dd>
<dt>Correct output:</dt>
<dd><pre>{expected}</pre></dd>
</dl>
</div>"""
_fatal = """
<div class="result-output result-incorrect">
<h4>Error</h4>
<dl>
<dt>Message:</dt>
<dd class="result-actual-output"><pre>{error}</pre></dd>
</dl>
</div>"""
out = {}
# Embed single grader results into a list
if isinstance(result, dict):
result = [result]
logging.debug(result)
logging.debug(type(result))
if not result:
logging.warning('Empty result!')
result = [{'correct':False, 'error': 'It seems like you crashed the evaluator.\n\n'
'Please check if your code runs on your computer, '
'and that you read the question correctly. '
'Afterwards retry or contact the '
'course staff, sorry :('}]
# Correct if all tests passed
n_correct = sum(r['correct'] for r in result)
out['correct'] = (n_correct == len(result))
# Percentage of score
out['score'] = n_correct / len(result)
# HTML formatted message
# _start contains the header with the overall message and and links to
# open a tab with the detailed results (_fatal/_correct/_wrong).
if any(('error' in res) for res in result):
msg = _start.format('ERROR')
elif out['correct']:
msg = _start.format('CORRECT')
else:
msg = _start.format('INCORRECT')
# This middle part is hidden by default and shows detailed test results
for i, res in enumerate(result):
# Define defaults for all components
answer = {'correct': False, 'function': '', 'result': '', 'expected': ''}
answer.update(res)
if 'error' in res:
msg += _fatal.format(**answer)
else:
_name = 'Test Case {}'.format(i+1)
if res['correct']:
msg += _correct.format(header=_name, **answer)
else:
msg += _wrong.format(header=_name, **answer)
# _end closes all opened html-tags
msg += _end
out['msg'] = msg
return out
def get_info(json_object):
"""Parse xqueue input.
Note: If you specify more parameters for the grader payload (in openEDX)
you could extract them here (as we did with 'problem_name')
Returns:
problem_name (str): Unique identifier for the exercise.
student_response (str): Python code from the student.
"""
json_object = json.loads(json_object['xqueue_body'])
grader_payload = json.loads(json_object['grader_payload'])
student_response = json_object['student_response']
_id = json.loads(json_object['student_info']).get('anonymous_student_id', 'unknown')
return grader_payload['problem_name'], student_response, _id
def randgen():
"""Random file name generator."""
return '_'.join([time.strftime('%Y%m%d%H%M%S'),
str(time.time()).split('.')[-1],
str(random.random()).split('.')[-1]])
def _wrap(msg):
"""Takes a string and wraps it in horizontal dashes."""
return ('\n---------------------------------'
'---------------------------------\n'
'{}'
'\n---------------------------------'
'---------------------------------\n'.format(msg))
def start_grader(host='localhost', port=10101,
log_file='log/%Y/%m/%d/grader_%Y-%m-%dT%H%M.log'):
# Set working directory
os.chdir(os.path.abspath(os.path.dirname(sys.argv[0])))
setup_logging(logfile=time.strftime(log_file),
lvl_bash=logging.INFO, lvl_logfile=logging.INFO, symlink='log')
# Start the server
try:
SERVER = ThreadedHTTPServer((host, port), Handler)
logging.info('Starting grader on %s:%s...', host, port)
SERVER.serve_forever()
except KeyboardInterrupt:
logging.info('Server shut down with ^C')
if __name__ == '__main__':
start_grader()