diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 06fd32b..ccc04be 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: args: [--ignore-case] files: ^docs/source/spelling_wordlist\.txt$ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.1 + rev: v0.14.4 hooks: - id: ruff-check args: [--fix, --exit-non-zero-on-fix] diff --git a/CHANGELOG.md b/CHANGELOG.md index b2982ef..ab20369 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Add `nvidia-ml-py` 13.580.82 to support list for NVIDIA Spark/Thor by [@johnnynunez](https://github.com/johnnynunez) in [#186](https://github.com/XuehaiPan/nvitop/pull/186). +- Add bar charts for memory bandwidth and power usage in the main screen by [@XuehaiPan](https://github.com/XuehaiPan) in [#190](https://github.com/XuehaiPan/nvitop/pull/190). ### Changed diff --git a/nvitop/tui/library/device.py b/nvitop/tui/library/device.py index 8973384..8276161 100644 --- a/nvitop/tui/library/device.py +++ b/nvitop/tui/library/device.py @@ -8,7 +8,7 @@ from __future__ import annotations import enum from typing import Any, ClassVar, Literal -from nvitop.api import NA, Snapshot, libnvml, ttl_cache, utilization2string +from nvitop.api import NA, NaType, Snapshot, libnvml, ttl_cache, utilization2string from nvitop.api import MigDevice as MigDeviceBase from nvitop.api import PhysicalDevice as DeviceBase from nvitop.tui.library.process import GpuProcess, GpuProcessBase @@ -30,7 +30,7 @@ class LoadingIntensity(enum.IntEnum): return 'red' -class Device(DeviceBase): +class Device(DeviceBase): # pylint: disable=too-many-public-methods GPU_PROCESS_CLASS: ClassVar[type[GpuProcessBase]] = GpuProcess MEMORY_UTILIZATION_THRESHOLDS: ClassVar[tuple[int, int]] = (10, 80) @@ -62,6 +62,7 @@ class Device(DeviceBase): 'compute_mode', 'mig_mode', 'is_mig_device', + 'power_utilization', 'memory_percent_string', 'memory_utilization_string', 'gpu_utilization_string', @@ -69,8 +70,12 @@ class Device(DeviceBase): 'temperature_string', 'memory_loading_intensity', 'memory_display_color', + 'bandwidth_loading_intensity', + 'bandwidth_display_color', 'gpu_loading_intensity', 'gpu_display_color', + 'power_loading_intensity', + 'power_display_color', 'loading_intensity', 'display_color', ] @@ -112,7 +117,6 @@ class Device(DeviceBase): fan_speed = ttl_cache(ttl=5.0)(DeviceBase.fan_speed) temperature = ttl_cache(ttl=5.0)(DeviceBase.temperature) - power_usage = ttl_cache(ttl=5.0)(DeviceBase.power_usage) display_active = ttl_cache(ttl=5.0)(DeviceBase.display_active) display_mode = ttl_cache(ttl=5.0)(DeviceBase.display_mode) current_driver_model = ttl_cache(ttl=5.0)(DeviceBase.current_driver_model) @@ -124,6 +128,15 @@ class Device(DeviceBase): compute_mode = ttl_cache(ttl=5.0)(DeviceBase.compute_mode) mig_mode = ttl_cache(ttl=5.0)(DeviceBase.mig_mode) + def power_utilization(self) -> float | NaType: # in percentage + power_limit = self.power_limit() + if not libnvml.nvmlCheckReturn(power_limit, int) or power_limit == 0: + return NA + power_usage = self.power_usage() + if not libnvml.nvmlCheckReturn(power_usage, int): + return NA + return round(100.0 * power_usage / power_limit, 1) + def memory_percent_string(self) -> str: # in percentage return utilization2string(self.memory_percent()) @@ -143,9 +156,15 @@ class Device(DeviceBase): def memory_loading_intensity(self) -> LoadingIntensity: return self.loading_intensity_of(self.memory_percent(), type='memory') + def bandwidth_loading_intensity(self) -> LoadingIntensity: + return self.loading_intensity_of(self.memory_utilization(), type='memory') + def gpu_loading_intensity(self) -> LoadingIntensity: return self.loading_intensity_of(self.gpu_utilization(), type='gpu') + def power_loading_intensity(self) -> LoadingIntensity: + return self.loading_intensity_of(self.power_utilization(), type='gpu') + def loading_intensity(self) -> LoadingIntensity: return max(self.memory_loading_intensity(), self.gpu_loading_intensity()) @@ -159,11 +178,21 @@ class Device(DeviceBase): return 'red' return self.memory_loading_intensity().color() + def bandwidth_display_color(self) -> str: + if self.name().startswith('ERROR:'): + return 'red' + return self.bandwidth_loading_intensity().color() + def gpu_display_color(self) -> str: if self.name().startswith('ERROR:'): return 'red' return self.gpu_loading_intensity().color() + def power_display_color(self) -> str: + if self.name().startswith('ERROR:'): + return 'red' + return self.power_loading_intensity().color() + @staticmethod def loading_intensity_of( utilization: float | str, diff --git a/nvitop/tui/screens/main/__init__.py b/nvitop/tui/screens/main/__init__.py index 929b6c7..1d8bc7f 100644 --- a/nvitop/tui/screens/main/__init__.py +++ b/nvitop/tui/screens/main/__init__.py @@ -189,7 +189,7 @@ class MainScreen(BaseSelectableScreen): # pylint: disable=too-many-instance-att def print(self) -> None: if self.device_count > 0: print_width = min(panel.print_width() for panel in self.container) - self.width = max(print_width, min(self.width, 100)) + self.width = max(print_width, min(self.width, 128)) else: self.width = 79 for panel in self.container: diff --git a/nvitop/tui/screens/main/panels/device.py b/nvitop/tui/screens/main/panels/device.py index d2db967..bb38171 100644 --- a/nvitop/tui/screens/main/panels/device.py +++ b/nvitop/tui/screens/main/panels/device.py @@ -351,28 +351,24 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes self.color_at(y, self.x + 56, width=22, fg=device.display_color, attr=attr) if draw_bars: - matrix = [ - ( - self.x + 80, - y_start, - remaining_width - 3, - 'MEM', - device.memory_percent, - device.memory_display_color, - ), - ( - self.x + 80, - y_start + 1, - remaining_width - 3, - 'UTL', - device.gpu_utilization, - device.gpu_display_color, - ), - ] - if self.compact: - if remaining_width >= 44 and not device.is_mig_device: - left_width = (remaining_width - 6 + 1) // 2 - 1 - right_width = (remaining_width - 6) // 2 + 1 + left_width = (remaining_width - 6 + 1) // 2 - 1 + right_width = (remaining_width - 6) // 2 + 1 + matrix: list[tuple[int, int, int, str, float, str]] = [] + if device.is_mig_device: + matrix = [ + ( + self.x + 80, + y_start, + remaining_width - 3, + 'MEM', + device.memory_percent, + device.memory_display_color, + ), + ] + if remaining_width >= 44 and len(prev_device_index) == 1: + self.addstr(y_start - 1, self.x + 80 + left_width + 1, '┴') + elif self.compact: + if remaining_width >= 44: matrix = [ ( self.x + 80, @@ -395,19 +391,85 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes if len(prev_device_index) == 2: separator = '┬' self.addstr(y_start - 1, self.x + 80 + left_width + 1, separator) - self.addstr(y_start, self.x + 80 + left_width + 1, '│') if index == len(self.snapshots) - 1: self.addstr(y_start + 1, self.x + 80 + left_width + 1, '╧') else: - if remaining_width >= 44 and len(prev_device_index) == 1: - self.addstr(y_start - 1, self.x + 80 + left_width + 1, '┴') - matrix.pop() - elif device.is_mig_device: - matrix.pop() + matrix = [ + ( + self.x + 80, + y_start, + remaining_width - 3, + 'MEM', + device.memory_percent, + device.memory_display_color, + ), + ] + else: + if remaining_width >= 44: + matrix = [ + ( + self.x + 80, + y_start, + left_width, + 'MEM', + device.memory_percent, + device.memory_display_color, + ), + ( + self.x + 80, + y_start + 1, + left_width, + 'UTL', + device.gpu_utilization, + device.gpu_display_color, + ), + ( + self.x + 80 + left_width + 3, + y_start, + right_width, + 'MBW', + device.memory_utilization, + device.bandwidth_display_color, + ), + ( + self.x + 80 + left_width + 3, + y_start + 1, + right_width, + 'PWR', + device.power_utilization, + device.power_display_color, + ), + ] + separator = '┼' if index > 0 else '╤' + if len(prev_device_index) == 2: + separator = '┬' + self.addstr(y_start - 1, self.x + 80 + left_width + 1, separator) + if index == len(self.snapshots) - 1: + self.addstr(y_start + 2, self.x + 80 + left_width + 1, '╧') + else: + matrix = [ + ( + self.x + 80, + y_start, + remaining_width - 3, + 'MEM', + device.memory_percent, + device.memory_display_color, + ), + ( + self.x + 80, + y_start + 1, + remaining_width - 3, + 'UTL', + device.gpu_utilization, + device.gpu_display_color, + ), + ] + for x_offset, y, width, prefix, utilization, color in matrix: # pylint: disable-next=disallowed-name bar = make_bar(prefix, utilization, width=width) - self.addstr(y, x_offset, bar) + self.addstr(y, x_offset - 2, f'│ {bar}') if self.TERM_256COLOR: parts = bar.rstrip().split(' ') prefix_len = len(parts[0]) @@ -433,7 +495,7 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes return self.width return 79 - def print(self) -> None: # pylint: disable=too-many-locals,too-many-branches + def print(self) -> None: # pylint: disable=too-many-locals,too-many-branches,too-many-statements lines = [time.strftime('%a %b %d %H:%M:%S %Y'), *self.header_lines(compact=False)] if self.device_count > 0: @@ -482,27 +544,93 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes lines[y_start - 1][:-1] + '╪' + '═' * (remaining_width - 1) + '╕' ) - matrix = [ - ( - 'MEM', - device.memory_percent, - device.memory_display_color, - ), - ( - 'UTL', - device.gpu_utilization, - device.gpu_display_color, - ), - ] + left_width = (remaining_width - 6 + 1) // 2 - 1 + right_width = (remaining_width - 6) // 2 + 1 + matrix: list[list[tuple[str, float, str, int]]] = [] if device.is_mig_device: - matrix.pop() - for y, (prefix, utilization, color) in enumerate(matrix, start=y_start): - bar = make_bar( # pylint: disable=disallowed-name - prefix, - utilization, - width=remaining_width - 3, - ) - lines[y] += f' {colored(bar, color)} │' + matrix = [ + [ + ( + 'MEM', + device.memory_percent, + device.memory_display_color, + remaining_width - 3, + ), + ], + ] + if remaining_width >= 44 and len(prev_device_index) == 1: + lines[y_start - 1] = ( + lines[y_start - 1][: -right_width - 4] + + '┴' + + lines[y_start - 1][-right_width - 3 :] + ) + else: + if remaining_width >= 44: + matrix = [ + [ + ( + 'MEM', + device.memory_percent, + device.memory_display_color, + left_width, + ), + ( + 'MBW', + device.memory_utilization, + device.bandwidth_display_color, + right_width, + ), + ], + [ + ( + 'UTL', + device.gpu_utilization, + device.gpu_display_color, + left_width, + ), + ( + 'PWR', + device.power_utilization, + device.power_display_color, + right_width, + ), + ], + ] + separator = '┼' if index > 0 else '╤' + if len(prev_device_index) == 2: + separator = '┬' + lines[y_start - 1] = ( + lines[y_start - 1][: -right_width - 4] + + separator + + lines[y_start - 1][-right_width - 3 :] + ) + else: + matrix = [ + [ + ( + 'MEM', + device.memory_percent, + device.memory_display_color, + remaining_width - 3, + ), + ], + [ + ( + 'UTL', + device.gpu_utilization, + device.gpu_display_color, + remaining_width - 3, + ), + ], + ] + for y, row in enumerate(matrix, start=y_start): + for prefix, utilization, color, width in row: + bar = make_bar( # pylint: disable=disallowed-name + prefix, + utilization, + width=width, + ) + lines[y] += f' {colored(bar, color)} │' # type: ignore[arg-type] if index == len(self.snapshots) - 1: lines[y_start + len(matrix)] = ( @@ -511,6 +639,12 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes + '═' * (remaining_width - 1) + '╛' ) + if remaining_width >= 44 and len(matrix[0]) > 1: + lines[y_start + len(matrix)] = ( + lines[y_start + len(matrix)][: -right_width - 4] + + '╧' + + lines[y_start + len(matrix)][-right_width - 3 :] + ) y_start += len(matrix) prev_device_index = device.tuple_index