Skip to content

Commit

Permalink
Add process context switch counts to resource monitoring (#3120)
Browse files Browse the repository at this point in the history
  • Loading branch information
benclifford authored Mar 6, 2024
1 parent 544ea95 commit 356f980
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 0 deletions.
6 changes: 6 additions & 0 deletions parsl/monitoring/db_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,12 @@ class Resource(Base):
'psutil_process_disk_write', Float, nullable=True)
psutil_process_status = Column(
'psutil_process_status', Text, nullable=True)
psutil_cpu_num = Column(
'psutil_cpu_num', Text, nullable=True)
psutil_process_num_ctx_switches_voluntary = Column(
'psutil_process_num_ctx_switches_voluntary', Float, nullable=True)
psutil_process_num_ctx_switches_involuntary = Column(
'psutil_process_num_ctx_switches_involuntary', Float, nullable=True)
__table_args__ = (
PrimaryKeyConstraint('try_id', 'task_id', 'run_id', 'timestamp'),
)
Expand Down
29 changes: 29 additions & 0 deletions parsl/monitoring/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ def monitor(pid: int,

children_user_time = {} # type: Dict[int, float]
children_system_time = {} # type: Dict[int, float]
children_num_ctx_switches_voluntary = {} # type: Dict[int, float]
children_num_ctx_switches_involuntary = {} # type: Dict[int, float]

def accumulate_and_prepare() -> Dict[str, Any]:
d = {"psutil_process_" + str(k): v for k, v in pm.as_dict().items() if k in simple}
Expand All @@ -218,6 +220,15 @@ def accumulate_and_prepare() -> Dict[str, Any]:
logging.debug("got children")

d["psutil_cpu_count"] = psutil.cpu_count()

# note that this will be the CPU number of the base process, not anything launched by it
d["psutil_cpu_num"] = pm.cpu_num()

pctxsw = pm.num_ctx_switches()

d["psutil_process_num_ctx_switches_voluntary"] = pctxsw.voluntary
d["psutil_process_num_ctx_switches_involuntary"] = pctxsw.involuntary

d['psutil_process_memory_virtual'] = pm.memory_info().vms
d['psutil_process_memory_resident'] = pm.memory_info().rss
d['psutil_process_time_user'] = pm.cpu_times().user
Expand All @@ -238,6 +249,11 @@ def accumulate_and_prepare() -> Dict[str, Any]:
child_system_time = child.cpu_times().system
children_user_time[child.pid] = child_user_time
children_system_time[child.pid] = child_system_time

pctxsw = child.num_ctx_switches()
children_num_ctx_switches_voluntary[child.pid] = pctxsw.voluntary
children_num_ctx_switches_involuntary[child.pid] = pctxsw.involuntary

d['psutil_process_memory_virtual'] += child.memory_info().vms
d['psutil_process_memory_resident'] += child.memory_info().rss
try:
Expand All @@ -248,14 +264,27 @@ def accumulate_and_prepare() -> Dict[str, Any]:
logging.exception("Exception reading IO counters for child {k}. Recorded IO usage may be incomplete".format(k=k), exc_info=True)
d['psutil_process_disk_write'] += 0
d['psutil_process_disk_read'] += 0

total_children_user_time = 0.0
for child_pid in children_user_time:
total_children_user_time += children_user_time[child_pid]

total_children_system_time = 0.0
for child_pid in children_system_time:
total_children_system_time += children_system_time[child_pid]

total_children_num_ctx_switches_voluntary = 0.0
for child_pid in children_num_ctx_switches_voluntary:
total_children_num_ctx_switches_voluntary += children_num_ctx_switches_voluntary[child_pid]

total_children_num_ctx_switches_involuntary = 0.0
for child_pid in children_num_ctx_switches_involuntary:
total_children_num_ctx_switches_involuntary += children_num_ctx_switches_involuntary[child_pid]

d['psutil_process_time_user'] += total_children_user_time
d['psutil_process_time_system'] += total_children_system_time
d['psutil_process_num_ctx_switches_voluntary'] += total_children_num_ctx_switches_voluntary
d['psutil_process_num_ctx_switches_involuntary'] += total_children_num_ctx_switches_involuntary
logging.debug("sending message")
return d

Expand Down
7 changes: 7 additions & 0 deletions parsl/monitoring/visualization/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,12 @@ class Resource(db.Model):
'psutil_process_disk_write', db.Float, nullable=True)
psutil_process_status = db.Column(
'psutil_process_status', db.Text, nullable=True)
psutil_cpu_num = db.Column(
'psutil_cpu_num', db.Text, nullable=True)
psutil_process_num_ctx_switches_voluntary = db.Column(
'psutil_process_num_ctx_switches_voluntary', db.Float, nullable=True)
psutil_process_num_ctx_switches_involuntary = db.Column(
'psutil_process_num_ctx_switches_involuntary', db.Float, nullable=True)

__table_args__ = (
db.PrimaryKeyConstraint('task_id', 'run_id', 'timestamp'),)

0 comments on commit 356f980

Please sign in to comment.