feat(tui/device): add bar charts for memory bandwidth and power usage (#190)

This commit is contained in:
Xuehai Pan 2025-11-09 23:52:22 +08:00
parent a1e15da2bf
commit 932257a5f9
5 changed files with 219 additions and 55 deletions

View file

@ -29,7 +29,7 @@ repos:
args: [--ignore-case]
files: ^docs/source/spelling_wordlist\.txt$
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.1
rev: v0.14.4
hooks:
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]

View file

@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Add `nvidia-ml-py` 13.580.82 to support list for NVIDIA Spark/Thor by [@johnnynunez](https://github.com/johnnynunez) in [#186](https://github.com/XuehaiPan/nvitop/pull/186).
- Add bar charts for memory bandwidth and power usage in the main screen by [@XuehaiPan](https://github.com/XuehaiPan) in [#190](https://github.com/XuehaiPan/nvitop/pull/190).
### Changed

View file

@ -8,7 +8,7 @@ from __future__ import annotations
import enum
from typing import Any, ClassVar, Literal
from nvitop.api import NA, Snapshot, libnvml, ttl_cache, utilization2string
from nvitop.api import NA, NaType, Snapshot, libnvml, ttl_cache, utilization2string
from nvitop.api import MigDevice as MigDeviceBase
from nvitop.api import PhysicalDevice as DeviceBase
from nvitop.tui.library.process import GpuProcess, GpuProcessBase
@ -30,7 +30,7 @@ class LoadingIntensity(enum.IntEnum):
return 'red'
class Device(DeviceBase):
class Device(DeviceBase): # pylint: disable=too-many-public-methods
GPU_PROCESS_CLASS: ClassVar[type[GpuProcessBase]] = GpuProcess
MEMORY_UTILIZATION_THRESHOLDS: ClassVar[tuple[int, int]] = (10, 80)
@ -62,6 +62,7 @@ class Device(DeviceBase):
'compute_mode',
'mig_mode',
'is_mig_device',
'power_utilization',
'memory_percent_string',
'memory_utilization_string',
'gpu_utilization_string',
@ -69,8 +70,12 @@ class Device(DeviceBase):
'temperature_string',
'memory_loading_intensity',
'memory_display_color',
'bandwidth_loading_intensity',
'bandwidth_display_color',
'gpu_loading_intensity',
'gpu_display_color',
'power_loading_intensity',
'power_display_color',
'loading_intensity',
'display_color',
]
@ -112,7 +117,6 @@ class Device(DeviceBase):
fan_speed = ttl_cache(ttl=5.0)(DeviceBase.fan_speed)
temperature = ttl_cache(ttl=5.0)(DeviceBase.temperature)
power_usage = ttl_cache(ttl=5.0)(DeviceBase.power_usage)
display_active = ttl_cache(ttl=5.0)(DeviceBase.display_active)
display_mode = ttl_cache(ttl=5.0)(DeviceBase.display_mode)
current_driver_model = ttl_cache(ttl=5.0)(DeviceBase.current_driver_model)
@ -124,6 +128,15 @@ class Device(DeviceBase):
compute_mode = ttl_cache(ttl=5.0)(DeviceBase.compute_mode)
mig_mode = ttl_cache(ttl=5.0)(DeviceBase.mig_mode)
def power_utilization(self) -> float | NaType: # in percentage
power_limit = self.power_limit()
if not libnvml.nvmlCheckReturn(power_limit, int) or power_limit == 0:
return NA
power_usage = self.power_usage()
if not libnvml.nvmlCheckReturn(power_usage, int):
return NA
return round(100.0 * power_usage / power_limit, 1)
def memory_percent_string(self) -> str: # in percentage
return utilization2string(self.memory_percent())
@ -143,9 +156,15 @@ class Device(DeviceBase):
def memory_loading_intensity(self) -> LoadingIntensity:
return self.loading_intensity_of(self.memory_percent(), type='memory')
def bandwidth_loading_intensity(self) -> LoadingIntensity:
return self.loading_intensity_of(self.memory_utilization(), type='memory')
def gpu_loading_intensity(self) -> LoadingIntensity:
return self.loading_intensity_of(self.gpu_utilization(), type='gpu')
def power_loading_intensity(self) -> LoadingIntensity:
return self.loading_intensity_of(self.power_utilization(), type='gpu')
def loading_intensity(self) -> LoadingIntensity:
return max(self.memory_loading_intensity(), self.gpu_loading_intensity())
@ -159,11 +178,21 @@ class Device(DeviceBase):
return 'red'
return self.memory_loading_intensity().color()
def bandwidth_display_color(self) -> str:
if self.name().startswith('ERROR:'):
return 'red'
return self.bandwidth_loading_intensity().color()
def gpu_display_color(self) -> str:
if self.name().startswith('ERROR:'):
return 'red'
return self.gpu_loading_intensity().color()
def power_display_color(self) -> str:
if self.name().startswith('ERROR:'):
return 'red'
return self.power_loading_intensity().color()
@staticmethod
def loading_intensity_of(
utilization: float | str,

View file

@ -189,7 +189,7 @@ class MainScreen(BaseSelectableScreen): # pylint: disable=too-many-instance-att
def print(self) -> None:
if self.device_count > 0:
print_width = min(panel.print_width() for panel in self.container)
self.width = max(print_width, min(self.width, 100))
self.width = max(print_width, min(self.width, 128))
else:
self.width = 79
for panel in self.container:

View file

@ -351,28 +351,24 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes
self.color_at(y, self.x + 56, width=22, fg=device.display_color, attr=attr)
if draw_bars:
matrix = [
(
self.x + 80,
y_start,
remaining_width - 3,
'MEM',
device.memory_percent,
device.memory_display_color,
),
(
self.x + 80,
y_start + 1,
remaining_width - 3,
'UTL',
device.gpu_utilization,
device.gpu_display_color,
),
]
if self.compact:
if remaining_width >= 44 and not device.is_mig_device:
left_width = (remaining_width - 6 + 1) // 2 - 1
right_width = (remaining_width - 6) // 2 + 1
left_width = (remaining_width - 6 + 1) // 2 - 1
right_width = (remaining_width - 6) // 2 + 1
matrix: list[tuple[int, int, int, str, float, str]] = []
if device.is_mig_device:
matrix = [
(
self.x + 80,
y_start,
remaining_width - 3,
'MEM',
device.memory_percent,
device.memory_display_color,
),
]
if remaining_width >= 44 and len(prev_device_index) == 1:
self.addstr(y_start - 1, self.x + 80 + left_width + 1, '')
elif self.compact:
if remaining_width >= 44:
matrix = [
(
self.x + 80,
@ -395,19 +391,85 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes
if len(prev_device_index) == 2:
separator = ''
self.addstr(y_start - 1, self.x + 80 + left_width + 1, separator)
self.addstr(y_start, self.x + 80 + left_width + 1, '')
if index == len(self.snapshots) - 1:
self.addstr(y_start + 1, self.x + 80 + left_width + 1, '')
else:
if remaining_width >= 44 and len(prev_device_index) == 1:
self.addstr(y_start - 1, self.x + 80 + left_width + 1, '')
matrix.pop()
elif device.is_mig_device:
matrix.pop()
matrix = [
(
self.x + 80,
y_start,
remaining_width - 3,
'MEM',
device.memory_percent,
device.memory_display_color,
),
]
else:
if remaining_width >= 44:
matrix = [
(
self.x + 80,
y_start,
left_width,
'MEM',
device.memory_percent,
device.memory_display_color,
),
(
self.x + 80,
y_start + 1,
left_width,
'UTL',
device.gpu_utilization,
device.gpu_display_color,
),
(
self.x + 80 + left_width + 3,
y_start,
right_width,
'MBW',
device.memory_utilization,
device.bandwidth_display_color,
),
(
self.x + 80 + left_width + 3,
y_start + 1,
right_width,
'PWR',
device.power_utilization,
device.power_display_color,
),
]
separator = '' if index > 0 else ''
if len(prev_device_index) == 2:
separator = ''
self.addstr(y_start - 1, self.x + 80 + left_width + 1, separator)
if index == len(self.snapshots) - 1:
self.addstr(y_start + 2, self.x + 80 + left_width + 1, '')
else:
matrix = [
(
self.x + 80,
y_start,
remaining_width - 3,
'MEM',
device.memory_percent,
device.memory_display_color,
),
(
self.x + 80,
y_start + 1,
remaining_width - 3,
'UTL',
device.gpu_utilization,
device.gpu_display_color,
),
]
for x_offset, y, width, prefix, utilization, color in matrix:
# pylint: disable-next=disallowed-name
bar = make_bar(prefix, utilization, width=width)
self.addstr(y, x_offset, bar)
self.addstr(y, x_offset - 2, f'{bar}')
if self.TERM_256COLOR:
parts = bar.rstrip().split(' ')
prefix_len = len(parts[0])
@ -433,7 +495,7 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes
return self.width
return 79
def print(self) -> None: # pylint: disable=too-many-locals,too-many-branches
def print(self) -> None: # pylint: disable=too-many-locals,too-many-branches,too-many-statements
lines = [time.strftime('%a %b %d %H:%M:%S %Y'), *self.header_lines(compact=False)]
if self.device_count > 0:
@ -482,27 +544,93 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes
lines[y_start - 1][:-1] + '' + '' * (remaining_width - 1) + ''
)
matrix = [
(
'MEM',
device.memory_percent,
device.memory_display_color,
),
(
'UTL',
device.gpu_utilization,
device.gpu_display_color,
),
]
left_width = (remaining_width - 6 + 1) // 2 - 1
right_width = (remaining_width - 6) // 2 + 1
matrix: list[list[tuple[str, float, str, int]]] = []
if device.is_mig_device:
matrix.pop()
for y, (prefix, utilization, color) in enumerate(matrix, start=y_start):
bar = make_bar( # pylint: disable=disallowed-name
prefix,
utilization,
width=remaining_width - 3,
)
lines[y] += f' {colored(bar, color)}'
matrix = [
[
(
'MEM',
device.memory_percent,
device.memory_display_color,
remaining_width - 3,
),
],
]
if remaining_width >= 44 and len(prev_device_index) == 1:
lines[y_start - 1] = (
lines[y_start - 1][: -right_width - 4]
+ ''
+ lines[y_start - 1][-right_width - 3 :]
)
else:
if remaining_width >= 44:
matrix = [
[
(
'MEM',
device.memory_percent,
device.memory_display_color,
left_width,
),
(
'MBW',
device.memory_utilization,
device.bandwidth_display_color,
right_width,
),
],
[
(
'UTL',
device.gpu_utilization,
device.gpu_display_color,
left_width,
),
(
'PWR',
device.power_utilization,
device.power_display_color,
right_width,
),
],
]
separator = '' if index > 0 else ''
if len(prev_device_index) == 2:
separator = ''
lines[y_start - 1] = (
lines[y_start - 1][: -right_width - 4]
+ separator
+ lines[y_start - 1][-right_width - 3 :]
)
else:
matrix = [
[
(
'MEM',
device.memory_percent,
device.memory_display_color,
remaining_width - 3,
),
],
[
(
'UTL',
device.gpu_utilization,
device.gpu_display_color,
remaining_width - 3,
),
],
]
for y, row in enumerate(matrix, start=y_start):
for prefix, utilization, color, width in row:
bar = make_bar( # pylint: disable=disallowed-name
prefix,
utilization,
width=width,
)
lines[y] += f' {colored(bar, color)}' # type: ignore[arg-type]
if index == len(self.snapshots) - 1:
lines[y_start + len(matrix)] = (
@ -511,6 +639,12 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes
+ '' * (remaining_width - 1)
+ ''
)
if remaining_width >= 44 and len(matrix[0]) > 1:
lines[y_start + len(matrix)] = (
lines[y_start + len(matrix)][: -right_width - 4]
+ ''
+ lines[y_start + len(matrix)][-right_width - 3 :]
)
y_start += len(matrix)
prev_device_index = device.tuple_index