Skip to content

Commit

Permalink
Switch to psutils for resource utilization monitoring at runtime.
Browse files Browse the repository at this point in the history
  • Loading branch information
todor-ivanov committed Jul 25, 2023
1 parent e920d72 commit 9a47116
Showing 1 changed file with 16 additions and 31 deletions.
47 changes: 16 additions & 31 deletions src/python/WMCore/WMRuntime/Monitors/PerformanceMonitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os.path
import signal
import time
import psutils

import WMCore.Algorithms.SubprocessAlgos as subprocessAlgos
import WMCore.FwkJobReport.Report as Report
Expand Down Expand Up @@ -84,8 +85,6 @@ def __init__(self):

self.pid = None
self.uid = os.getuid()
self.monitorBase = "ps -p %i -o pid,ppid,rss,pcpu,pmem,cmd -ww | grep %i"
self.pssMemoryCommand = "awk '/^Pss/ {pss += $2} END {print pss}' /proc/%i/smaps"
self.monitorCommand = None
self.currentStepSpace = None
self.currentStepName = None
Expand Down Expand Up @@ -208,38 +207,24 @@ def periodicUpdate(self):
# Then we have no step PID, we can do nothing
return

# Now we run the ps monitor command and collate the data
# Gathers RSS, %CPU and %MEM statistics from ps
ps_cmd = self.monitorBase % (stepPID, stepPID)
stdout, _stderr, _retcode = subprocessAlgos.runCommand(ps_cmd)

ps_output = stdout.split()
if not len(ps_output) > 6:
# Then something went wrong in getting the ps data
msg = "Error when grabbing output from process ps\n"
msg += "output = %s\n" % ps_output
msg += "command = %s\n" % ps_cmd
logging.error(msg)
return

# run the command to gather PSS memory statistics from /proc/<pid>/smaps
smaps_cmd = self.pssMemoryCommand % (stepPID)
stdout, _stderr, _retcode = subprocessAlgos.runCommand(smaps_cmd)

smaps_output = stdout.split()
if not len(smaps_output) == 1:
# Then something went wrong in getting the smaps data
msg = "Error when grabbing output from smaps\n"
msg += "output = %s\n" % smaps_output
msg += "command = %s\n" % smaps_cmd
logging.error(msg)
# Now we run the psutil module and collate some process data.
# We gather info about CPU system time and Memory statistics - VMS, PSS, RSS
stepProc = psutils.Process(stepPID)
if not stepProc.is_running():
# The step process has vanished before we managed to finish the current periodic update
msg = " The step process with PID: %s of STEP: %s "
msg += "has finished execution before the last periodic resource update.\n"
logging.warning(msg, stepPid, self.currentStepName)
return

# smaps also returns data in kiloBytes, let's make it megaBytes
# I'm also confused with these megabytes and mebibytes...
pss = int(smaps_output[0]) // 1000
stepCmd = stepProc.cmdline()
stepMemInfo = stepProc.memory_full_info()
stepCpuInfo = stepProc.cpu_times()
# NOTE: All the information from psutil.memory_*info() comes in Bytes
# we need to make it in MegaBytes
pss = int(stepMemInfo.pss) // (1000**2)

logging.info("PSS: %s; RSS: %s; PCPU: %s; PMEM: %s", smaps_output[0], ps_output[2], ps_output[3], ps_output[4])
logging.info("PID: %s; VirtMEM: %s, PSS: %s; RSS: %s; SystemTime: %s;", stepPID, stepMemInfo.vms, stepMemInfo.pss, stepMemInfo.rss, stepCpuInfo.system)

msg = 'Error in CMSSW step %s\n' % self.currentStepName
msg += 'Number of Cores: %s\n' % self.numOfCores
Expand Down

0 comments on commit 9a47116

Please sign in to comment.