Skip to content

Commit

Permalink
add sigusr2 handler to request all-rank backtrace in python
Browse files Browse the repository at this point in the history
  • Loading branch information
lehner committed Sep 20, 2024
1 parent b9e0f86 commit 5115eed
Showing 1 changed file with 34 additions and 3 deletions.
37 changes: 34 additions & 3 deletions lib/gpt/core/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
import gpt, sys, os
import gpt, sys, os, signal, datetime
from inspect import getframeinfo, stack

verbose = gpt.default.is_verbose("message_context")
last_context = ""


def message(*a, force_output=False):
global last_context

# conversion to string can be an mpi process (i.e. for lattice),
# so need to do it on all ranks
s = " ".join([str(x) for x in a])
Expand All @@ -31,12 +35,39 @@ def message(*a, force_output=False):
caller = getframeinfo(st[0])
slines.append(f"{caller.filename}:{caller.lineno}")
cpath = os.path.commonpath(slines)
cs = ";".join([ x[len(cpath)+1:] for x in slines ])
s = f"[{cpath}|{cs}]\n{s}"
cs = ";".join([x[len(cpath) + 1 :] for x in slines])
ctx = f"[{cpath}|{cs}]"
if ctx != last_context:
s = f"{ctx}\n{s}"
last_context = ctx
if gpt.rank() == 0 or force_output:
lines = s.split("\n")
if len(lines) > 0:
print("GPT : %14.6f s :" % gpt.time(), lines[0])
for line in lines[1:]:
print(" :", line)
sys.stdout.flush()


def backtrace_signal_handler(sig, frame):

now = datetime.datetime.now()
log_directory = "log/" + now.strftime("%Y-%m-%d")
if not os.path.exists(log_directory):
os.makedirs(log_directory, exist_ok=True)

log_filename = f"{log_directory}/backtrace.{gpt.rank()}." + now.strftime("%H-%M-%f")
sys.stderr.write(f"Requested GPT backtrace; saved in {log_filename}\n")
sys.stderr.flush()

fout = open(log_filename, "wt")

while frame is not None:
caller = getframeinfo(frame)
fout.write(f"{caller.filename}:{caller.lineno}\n")
frame = frame.f_back

fout.close()


signal.signal(signal.SIGUSR2, backtrace_signal_handler)

0 comments on commit 5115eed

Please sign in to comment.