diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e65b03..5717705 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ repos: args: [--ignore-case] files: ^docs/source/spelling_wordlist\.txt$ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.6 + rev: v0.14.8 hooks: - id: ruff-check args: [--fix, --exit-non-zero-on-fix] @@ -41,7 +41,7 @@ repos: - id: codespell additional_dependencies: [".[toml]"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 + rev: v1.19.0 hooks: - id: mypy exclude: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bbe28f..a53e4e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- +- Support devices with unified memory (e.g., NVIDIA Spark) by [@XuehaiPan](https://github.com/XuehaiPan) in [#195](https://github.com/XuehaiPan/nvitop/pull/195). Issued by [@FlorinAndrei](https://github.com/FlorinAndrei). ### Changed diff --git a/nvitop/api/device.py b/nvitop/api/device.py index 8fa4bbb..da0225a 100644 --- a/nvitop/api/device.py +++ b/nvitop/api/device.py @@ -117,7 +117,7 @@ import time from collections import OrderedDict from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, overload -from nvitop.api import libcuda, libcudart, libnvml +from nvitop.api import host, libcuda, libcudart, libnvml from nvitop.api.process import GpuProcess from nvitop.api.utils import ( NA, @@ -148,32 +148,74 @@ __all__ = [ # Class definitions ################################################################################ -class MemoryInfo(NamedTuple): # in bytes # pylint: disable=missing-class-docstring +class MemoryInfo(NamedTuple): # in bytes + """Device memory information in bytes. + + Attributes: + total: Total device memory. + free: Unallocated device memory. + used: Allocated device memory. + reserved: Memory reserved for system use (default: NA). + """ + total: int | NaType free: int | NaType used: int | NaType + reserved: int | NaType = NA -class ClockInfos(NamedTuple): # in MHz # pylint: disable=missing-class-docstring +class ClockInfos(NamedTuple): # in MHz + """Clock speeds information in MHz. + + Attributes: + graphics: Graphics clock speed. + sm: SM (streaming multiprocessor) clock speed. + memory: Memory clock speed. + video: Video encoder/decoder clock speed. + """ + graphics: int | NaType sm: int | NaType memory: int | NaType video: int | NaType -class ClockSpeedInfos(NamedTuple): # pylint: disable=missing-class-docstring +class ClockSpeedInfos(NamedTuple): + """Clock speeds information in MHz. + + Attributes: + current: Current clock speeds. + max: Maximum clock speeds. + """ + current: ClockInfos max: ClockInfos -class UtilizationRates(NamedTuple): # in percentage # pylint: disable=missing-class-docstring +class UtilizationRates(NamedTuple): # in percentage + """Utilization rates in percentage. + + Attributes: + gpu: Percent of time over the past sample period during which one or more kernels was executing on the GPU. + memory: Percent of time over the past sample period during which global (device) memory was being read or written. + encoder: Video encoder utilization rate. + decoder: Video decoder utilization rate. + """ # pylint: disable=line-too-long + gpu: int | NaType memory: int | NaType encoder: int | NaType decoder: int | NaType -class ThroughputInfo(NamedTuple): # in KiB/s # pylint: disable=missing-class-docstring +class ThroughputInfo(NamedTuple): # in KiB/s + """Throughput information in KiB/s. + + Attributes: + tx: Transmit throughput in KiB/s. + rx: Receive throughput in KiB/s. + """ + tx: int | NaType rx: int | NaType @@ -925,18 +967,37 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me def memory_info(self) -> MemoryInfo: # in bytes """Return a named tuple with memory information (in bytes) for the device. - Returns: MemoryInfo(total, free, used) + Returns: MemoryInfo(total, free, used, reserved) A named tuple with memory information, the item could be :const:`nvitop.NA` when not applicable. """ if self._handle is not None: - memory_info = libnvml.nvmlQuery('nvmlDeviceGetMemoryInfo', self._handle) - if libnvml.nvmlCheckReturn(memory_info): - return MemoryInfo( - total=memory_info.total, - free=memory_info.free, - used=memory_info.used, + has_unified_memory = False + try: + memory_info = libnvml.nvmlQuery( + 'nvmlDeviceGetMemoryInfo', + self._handle, + ignore_errors=False, ) - return MemoryInfo(total=NA, free=NA, used=NA) + except libnvml.NVMLError_NotSupported: + has_unified_memory = True + memory_info = NA + except libnvml.NVMLError: + memory_info = NA + if libnvml.nvmlCheckReturn(memory_info): + if memory_info.total > 0: + return MemoryInfo( + total=memory_info.total, + free=memory_info.free, + used=memory_info.used, + reserved=getattr(memory_info, 'reserved', NA), + ) + has_unified_memory = True + if has_unified_memory: + # Device with unified memory + # Use system virtual memory as these devices share host memory + vm = host.virtual_memory() + return MemoryInfo(total=vm.total, free=vm.free, used=vm.used, reserved=NA) + return MemoryInfo(total=NA, free=NA, used=NA, reserved=NA) def memory_total(self) -> int | NaType: # in bytes """Total installed GPU memory in bytes. @@ -1014,8 +1075,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me Returns: Union[float, NaType] The percentage of used memory over total memory, or :const:`nvitop.NA` when not applicable. """ - total, _, used = self.memory_info() - if libnvml.nvmlCheckReturn(used, int) and libnvml.nvmlCheckReturn(total, int): + total, _, used, _ = self.memory_info() + if libnvml.nvmlCheckReturn(used, int) and libnvml.nvmlCheckReturn(total, int) and total > 0: return round(100.0 * used / total, 1) return NA @@ -1098,8 +1159,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me Returns: Union[float, NaType] The percentage of used BAR1 memory over total BAR1 memory, or :const:`nvitop.NA` when not applicable. """ # pylint: disable=line-too-long - total, _, used = self.bar1_memory_info() - if libnvml.nvmlCheckReturn(used, int) and libnvml.nvmlCheckReturn(total, int): + total, _, used, _ = self.bar1_memory_info() + if libnvml.nvmlCheckReturn(used, int) and libnvml.nvmlCheckReturn(total, int) and total > 0: return round(100.0 * used / total, 1) return NA diff --git a/nvitop/api/process.py b/nvitop/api/process.py index c8d7a76..a490902 100644 --- a/nvitop/api/process.py +++ b/nvitop/api/process.py @@ -639,7 +639,11 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi self._gpu_memory_human = bytes2human(self.gpu_memory()) memory_total = self.device.memory_total() gpu_memory_percent = NA - if libnvml.nvmlCheckReturn(memory_used, int) and libnvml.nvmlCheckReturn(memory_total, int): + if ( + libnvml.nvmlCheckReturn(memory_used, int) + and libnvml.nvmlCheckReturn(memory_total, int) + and memory_total > 0 + ): gpu_memory_percent = round(100.0 * memory_used / memory_total, 1) # type: ignore[assignment] self._gpu_memory_percent = gpu_memory_percent diff --git a/nvitop/tui/library/utils.py b/nvitop/tui/library/utils.py index 33b8e96..32308e2 100644 --- a/nvitop/tui/library/utils.py +++ b/nvitop/tui/library/utils.py @@ -125,8 +125,15 @@ def make_bar_chart( else: text = f'{min(round(percent), 100):d}%'.replace('100%', 'MAX') # type: ignore[arg-type] else: - bar_chart += '░' * (width - len(bar_chart) - 4) text = 'N/A' + if ( + extra_text + and 'N/A' not in extra_text.upper() + and swap_text + and len(bar_chart) + len(extra_text) + 2 <= width + ): + text, extra_text = extra_text, '' + bar_chart += '░' * (width - len(bar_chart) - len(text) - 1) if extra_text: if len(f'{bar_chart} {text} {extra_blank}{extra_text}') <= width: if swap_text: diff --git a/nvitop/tui/screens/main/panels/device.py b/nvitop/tui/screens/main/panels/device.py index 9042b75..707b96b 100644 --- a/nvitop/tui/screens/main/panels/device.py +++ b/nvitop/tui/screens/main/panels/device.py @@ -486,7 +486,7 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes prefix, utilization, width=width, - extra_text=extra_text, + extra_text=extra_text if 'N/A' not in extra_text else '', swap_text=not extra_text.endswith('MHz'), extra_blank=' ', ) @@ -665,7 +665,7 @@ class DevicePanel(BasePanel): # pylint: disable=too-many-instance-attributes prefix, utilization, width=width, - extra_text=extra_text, + extra_text=extra_text if 'N/A' not in extra_text else '', swap_text=not extra_text.endswith('MHz'), extra_blank=' ', )