Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/timeout #79

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ def read_backup(hdf5_file):
"tb_info": bool(hdf5_flags["tb_info"][0]),
"mem_info": bool(hdf5_flags["mem_info"][0]),
"max_instruction_count": int(hdf5_flags["max_instruction_count"][0]),
"fault_count": int(hdf5_flags["fault_count"][0]),
"start": {
"address": int(hdf5_start_address["address"][0]),
"counter": int(hdf5_start_address["counter"][0]),
Expand Down Expand Up @@ -436,7 +437,7 @@ def read_backup(hdf5_file):
rows = f_in.root.Goldenrun.armregisters.iterrows()
else:
raise tables.NoSuchNodeError(
"No supported register architecture could be found in the HDF5 file"
"No supported register architecture could be found in the HDF5 file, run with the overwrite flag to overwrite"
)

backup_goldenrun[register_backup_name] = []
Expand All @@ -455,6 +456,14 @@ def read_backup(hdf5_file):
backup_goldenrun[register_backup_name].append(registers)

# Process expanded faults
if (
f_in.root.Backup.expanded_faults._v_nchildren
!= backup_config["fault_count"]
):
raise tables.NoSuchNodeError(
f"Out of {backup_config['fault_count']} faults, only {f_in.root.Backup.expanded_faults._v_nchildren} are available in the backup. Run with the overwrite flag to overwrite"
)

backup_expanded_faults = []
exp_n = 0

Expand Down Expand Up @@ -597,10 +606,8 @@ def controller(
"Backup could not be found in the HDF5 file, run with the overwrite flag to overwrite!"
)
return config_qemu
except tables.NoSuchNodeError:
clogger.warning(
"Invalid/unsupported backup file, run with the overwrite flag to overwrite!"
)
except tables.NoSuchNodeError as e:
clogger.warning(e)
return config_qemu

clogger.info("Checking the backup")
Expand Down Expand Up @@ -784,6 +791,19 @@ def controller(
p = p_list[i]
# Find finished processes
p["process"].join(timeout=0)

# Kill process if timeout exceeded and gdb is not used
if (
p["process"].is_alive()
and (time.time() - p["start_time"]) > config_qemu["timeout"]
and not config_qemu.get("gdb", False)
):
clogger.error(
f"Process {p['process'].name} ran into timeout and was killed!"
)
p["process"].terminate()
p["process"].join()

if p["process"].is_alive() is False:
# Recalculate moving average
p_time_list.append(current_time - p["start_time"])
Expand Down Expand Up @@ -1006,6 +1026,7 @@ def process_arguments(args):
qemu_conf["tb_info"] = faultlist.get("tb_info", True)
qemu_conf["mem_info"] = faultlist.get("mem_info", False)
qemu_conf["ring_buffer"] = faultlist.get("ring_buffer", True)
qemu_conf["timeout"] = faultlist.get("timeout", 1200)

# Command line argument takes precedence
if args.disable_ring_buffer:
Expand Down
3 changes: 3 additions & 0 deletions fault-readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,6 @@ Use of the ring buffer implementation to store the list of executed translation

### mem_info
Enable collection of data on all memory accesses. The configuration property expects to be passed a boolean value. If unspecified, it will default to `false`.

### timeout
Maximum execution duration in seconds for a single experiment. If exceeded, ARCHIE will be stopped. If unspecified, it will default to `1200` seconds.
24 changes: 22 additions & 2 deletions faultclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from multiprocessing import Process
import os
import shlex
import signal
import subprocess
import time

Expand Down Expand Up @@ -82,6 +83,18 @@ def detect_model(fault_model):
)


class Timeout:
raised = False

def __init__(self):
signal.signal(signal.SIGINT, self.raise_timeout)
signal.signal(signal.SIGTERM, self.raise_timeout)

def raise_timeout(self, *args):
self.raised = True
raise KeyboardInterrupt


class Register(IntEnum):
ARM = 0
RISCV = 1
Expand Down Expand Up @@ -562,6 +575,8 @@ def readout_data(
max_ram_usage = 0
regtype = None

timeout = Timeout()

# Load data from the pipe
data_protobuf = data_pb2.Data()
data_protobuf.ParseFromString(pipe.read())
Expand Down Expand Up @@ -655,7 +670,7 @@ def readout_data(

max_ram_usage = gather_process_ram_usage(queue_ram_usage, max_ram_usage)

return max_ram_usage
return (max_ram_usage, timeout.raised)


def create_fifos():
Expand Down Expand Up @@ -825,7 +840,7 @@ def python_worker(

# From here Qemu has started execution. Now prepare for
# data extraction
mem = readout_data(
(mem, timeout_raised) = readout_data(
data_fifo,
index,
queue_output,
Expand All @@ -836,6 +851,11 @@ def python_worker(
qemu_post=qemu_post,
qemu_pre_data=qemu_pre_data,
)

if timeout_raised:
logger.error(f"Terminate process {index}")
p_qemu.terminate()

p_qemu.join()
delete_fifos()

Expand Down
11 changes: 10 additions & 1 deletion hdf5logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import queue
import signal
import logging
import time
Expand Down Expand Up @@ -141,6 +142,7 @@ class config_table(tables.IsDescription):
mem_info = tables.BoolCol()
max_instruction_count = tables.UInt64Col()
memory_dump = tables.BoolCol()
fault_count = tables.UInt64Col()


class hash_table(tables.IsDescription):
Expand Down Expand Up @@ -396,6 +398,7 @@ def process_config(f, configgroup, exp, myfilter):
config_row["tb_info"] = exp["tb_info"]
config_row["mem_info"] = exp["mem_info"]
config_row["max_instruction_count"] = exp["max_instruction_count"]
config_row["fault_count"] = exp["fault_count"]

config_row.append()

Expand Down Expand Up @@ -441,6 +444,8 @@ def process_config(f, configgroup, exp, myfilter):


def process_backup(f, configgroup, exp, myfilter, stop_signal):
exp["config"]["fault_count"] = len(exp["expanded_faultlist"])

process_config(f, configgroup, exp["config"], myfilter)

fault_expanded_group = f.create_group(
Expand Down Expand Up @@ -511,7 +516,11 @@ def hdf5collector(
if stop_signal.value == 1:
break
# readout queue and get next output from qemu. Will block
exp = queue_output.get()
try:
exp = queue_output.get_nowait()
except queue.Empty:
continue

t1 = time.time()
logger.debug(
"got exp {}, {} still need to be performed. Took {}s. Elements in queu: {}".format(
Expand Down
Loading