Created
December 17, 2022 14:47
-
-
Save wasdee/4dbb693b7c0476d176dfc53a2711be28 to your computer and use it in GitHub Desktop.
plot top
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
now=$(date +"%Y_%m_%d-%H_%M_%S") | |
top -b -n 120 > "top-120iters-$now.txt" | |
# this takes 120 iterations of top, which is 120*3 seconds = 6 minutes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
parse top output and plot the resource usage | |
""" | |
import re | |
from dataclasses import dataclass | |
from datetime import datetime | |
from io import StringIO | |
from pathlib import Path | |
from typing import Optional | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
@dataclass | |
class Position: | |
start: int | |
end_header: int | |
end: Optional[int] = None | |
@dataclass | |
class TopIter: | |
system_wide: str | |
header: str | |
positions: Position | |
data: str = "" | |
df: Optional[pd.DataFrame] = None | |
@property | |
def time(self): | |
match_ = re.match(r"top - (\d+:\d+:\d+)", self.system_wide)[1] | |
return datetime.strptime(match_, "%H:%M:%S") | |
def generate_dataframe(self): | |
if self.data == "": | |
raise ValueError("data is empty") | |
colnames = self.header.split() | |
# find out colspec | |
colspecs = [ | |
self.header.index(f" {value} ") + 1 | |
if value != "COMMAND" | |
else self.header.index(f" {value}") + 1 | |
for value in colnames | |
] | |
colspecs = [[cs, cs + len(colname)] for cs, colname in zip(colspecs, colnames)] | |
colspecs[0][0] = 0 | |
colspecs[-1][1] = 999 | |
# colspecs scan enlarge colspec idx border width = 1 | |
col_text_side = "RLRRRRRRRRRL" | |
for a, b, c, side in zip( | |
colspecs[:-2], colspecs[1:-1], colspecs[2:], col_text_side[1:-1] | |
): | |
if side == "R": | |
b[0] = a[1] + 1 | |
elif side == "L": | |
b[1] = c[0] - 1 | |
# enlarge small colspec | |
for i, (a, b) in enumerate(colspecs): | |
if b - a < 3: | |
# since all small colspec is on the right side, we only need to enlarge the right side | |
colspecs[i] = [a - 1, b] | |
# convert to tuple | |
colspecs = [(a, b) for a, b in colspecs] | |
df = pd.read_fwf(StringIO(self.data), colspecs=colspecs, names=colnames) | |
df["Time"] = self.time | |
self.df = df | |
return df | |
def parse_log(filepath): | |
filepath = Path(filepath) | |
with filepath.open() as f: | |
output = f.read() | |
top_iters: list[TopIter] = [] | |
# match system-wide and header first then greedy match data | |
pattern = re.compile(r"(top - (.*\n){6})( +PID.*COMMAND)\n") | |
for match in pattern.finditer(output): | |
system_wide = match[1] | |
header = match[3] | |
iter_ = TopIter( | |
system_wide.strip(), | |
header, | |
positions=Position(match.start(0), match.end(3)), | |
) | |
top_iters.append(iter_) | |
expect_n_iter = int(re.match(r"top-(\d+)iter.*", filepath.stem)[1]) | |
assert len(top_iters) == expect_n_iter | |
# update end position | |
for a, b in zip(top_iters[:-1], top_iters[1:]): | |
a.positions.end = b.positions.start | |
top_iters[-1].positions.end = len(output) | |
# extract data | |
for iter_ in top_iters: | |
iter_.data = output[iter_.positions.end_header : iter_.positions.end] | |
# generate dataframe | |
dfs = [iter_.generate_dataframe() for iter_ in top_iters] | |
df = pd.concat(dfs, ignore_index=True) | |
return df | |
def plot(df): | |
""" | |
find top 7 cpu usage process(geometric mean) and plot them | |
""" | |
# find top 7 cpu usage process | |
df_ = ( | |
df.groupby(["COMMAND"]) | |
.agg({"%CPU": lambda x: np.prod(x + 1) - 1}) | |
.sort_values(["%CPU"], ascending=False) | |
) | |
top7 = df_.iloc[:7] | |
top7_index = top7.index | |
# sort by time | |
df = df.sort_values(["Time"]) | |
# plot | |
fig, axs = plt.subplots(2, 1, sharex=True, figsize=(20, 10)) | |
for index in top7_index: | |
df_sub = df[df["COMMAND"] == index] | |
axs[0].plot(df_sub["Time"], df_sub["%CPU"], label=index) | |
axs[1].plot(df_sub["Time"], df_sub["%MEM"], label=index) | |
axs[0].legend() | |
axs[1].legend() | |
axs[0].set_ylabel("CPU(%)") | |
axs[1].set_ylabel("MEM(%)") | |
axs[1].set_xlabel("Time") | |
plt.show() | |
if "__main__" == __name__: | |
# parse all *.txt in current directory | |
dfs = [] | |
for filepath in Path(".").glob("*.txt"): | |
df = parse_log(filepath) | |
dfs.append(df) | |
df = pd.concat(dfs, ignore_index=True) | |
plot(df) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment