diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index e39846a..5f713fc 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -57,6 +57,16 @@ jobs: python -m nvitop.select --version python -m nvitop.select --help + - name: Import tests (Python 3.7) + run: | + "${{ steps.py37.outputs.python-path }}" -m pip install --upgrade pip setuptools + "${{ steps.py37.outputs.python-path }}" -m pip install -r requirements.txt + "${{ steps.py37.outputs.python-path }}" -c 'import nvitop' + "${{ steps.py37.outputs.python-path }}" -m nvitop --version + "${{ steps.py37.outputs.python-path }}" -m nvitop --help + "${{ steps.py37.outputs.python-path }}" -m nvitop.select --version + "${{ steps.py37.outputs.python-path }}" -m nvitop.select --help + - name: Install linters run: | python -m pip install --upgrade pre-commit pylint[spelling] diff --git a/CHANGELOG.md b/CHANGELOG.md index 50e8d6e..24ddd2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed -- +- Move `TTLCache` usage to CLI-only by [@XuehaiPan](https://github.com/XuehaiPan) in [#66](https://github.com/XuehaiPan/nvitop/pull/66). ### Fixed diff --git a/nvitop/api/device.py b/nvitop/api/device.py index 20b55e6..d137cc9 100644 --- a/nvitop/api/device.py +++ b/nvitop/api/device.py @@ -104,6 +104,7 @@ Examples: from __future__ import annotations import contextlib +import functools import multiprocessing as mp import os import re @@ -111,8 +112,6 @@ import threading from collections import OrderedDict from typing import Any, Callable, Iterable, NamedTuple -from cachetools.func import ttl_cache - from nvitop.api import libcuda, libcudart, libnvml from nvitop.api.process import GpuProcess from nvitop.api.utils import NA, NaType, Snapshot, boolify, bytes2human, memoize_when_activated @@ -730,7 +729,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me ) func = getattr(libnvml, 'nvmlDeviceGet' + pascal_case + suffix) - @ttl_cache(ttl=1.0) def attribute(*args: Any, **kwargs: Any) -> Any: try: return libnvml.nvmlQuery( @@ -877,7 +875,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me return libnvml.nvmlQuery('nvmlDeviceGetSerial', self.handle) @memoize_when_activated - @ttl_cache(ttl=1.0) def memory_info(self) -> MemoryInfo: # in bytes """Return a named tuple with memory information (in bytes) for the device. @@ -982,7 +979,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me return f'{self.memory_used_human()} / {self.memory_total_human()}' @memoize_when_activated - @ttl_cache(ttl=1.0) def bar1_memory_info(self) -> MemoryInfo: # in bytes """Return a named tuple with BAR1 memory information (in bytes) for the device. @@ -1069,7 +1065,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me return f'{self.bar1_memory_used_human()} / {self.bar1_memory_total_human()}' @memoize_when_activated - @ttl_cache(ttl=1.0) def utilization_rates(self) -> UtilizationRates: # in percentage """Return a named tuple with GPU utilization rates (in percentage) for the device. @@ -1143,7 +1138,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me return self.utilization_rates().decoder @memoize_when_activated - @ttl_cache(ttl=5.0) def clock_infos(self) -> ClockInfos: # in MHz """Return a named tuple with current clock speeds (in MHz) for the device. @@ -1168,7 +1162,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me clocks = clock_infos @memoize_when_activated - @ttl_cache(ttl=5.0) def max_clock_infos(self) -> ClockInfos: # in MHz """Return a named tuple with maximum clock speeds (in MHz) for the device. @@ -1309,7 +1302,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me """ # pylint: disable=line-too-long return self.max_clock_infos().video - @ttl_cache(ttl=5.0) def fan_speed(self) -> int | NaType: # in percentage """The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. @@ -1329,7 +1321,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me """ # pylint: disable=line-too-long return libnvml.nvmlQuery('nvmlDeviceGetFanSpeed', self.handle) - @ttl_cache(ttl=5.0) def temperature(self) -> int | NaType: # in Celsius """Core GPU temperature in degrees C. @@ -1349,7 +1340,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me ) @memoize_when_activated - @ttl_cache(ttl=5.0) def power_usage(self) -> int | NaType: # in milliwatts (mW) """The last measured power draw for the entire board in milliwatts. @@ -1367,7 +1357,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me power_draw = power_usage # in milliwatts (mW) @memoize_when_activated - @ttl_cache(ttl=60.0) def power_limit(self) -> int | NaType: # in milliwatts (mW) """The software power limit in milliwatts. @@ -1398,7 +1387,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me power_limit = f'{round(power_limit / 1000.0)}W' return f'{power_usage} / {power_limit}' - @ttl_cache(ttl=60.0) def display_active(self) -> str | NaType: """A flag that indicates whether a display is initialized on the GPU's (e.g. memory is allocated on the device for display). @@ -1421,7 +1409,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me NA, ) - @ttl_cache(ttl=60.0) def display_mode(self) -> str | NaType: """A flag that indicates whether a physical display (e.g. monitor) is currently connected to any of the GPU's connectors. @@ -1443,7 +1430,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me NA, ) - @ttl_cache(ttl=60.0) def current_driver_model(self) -> str | NaType: """The driver model currently in use. @@ -1471,7 +1457,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me driver_model = current_driver_model - @ttl_cache(ttl=60.0) def persistence_mode(self) -> str | NaType: """A flag that indicates whether persistence mode is enabled for the GPU. Value is either "Enabled" or "Disabled". @@ -1495,7 +1480,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me NA, ) - @ttl_cache(ttl=5.0) def performance_state(self) -> str | NaType: """The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance). @@ -1513,7 +1497,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me performance_state = 'P' + str(performance_state) return performance_state - @ttl_cache(ttl=5.0) def total_volatile_uncorrected_ecc_errors(self) -> int | NaType: """Total errors detected across entire chip. @@ -1533,7 +1516,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me libnvml.NVML_VOLATILE_ECC, ) - @ttl_cache(ttl=60.0) def compute_mode(self) -> str | NaType: """The compute mode flag indicates whether individual or multiple compute applications may run on the GPU. @@ -1588,7 +1570,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me self._is_mig_device = bool(is_mig_device) # nvmlDeviceIsMigDeviceHandle returns c_uint return self._is_mig_device - @ttl_cache(ttl=60.0) def mig_mode(self) -> str | NaType: """The MIG mode that the GPU is currently operating under. @@ -1653,7 +1634,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me return [self] return self.mig_devices() - @ttl_cache(ttl=2.0) def processes(self) -> dict[int, GpuProcess]: """Return a dictionary of processes running on the GPU. @@ -1834,7 +1814,6 @@ class PhysicalDevice(Device): """ return self._nvml_index - @ttl_cache(ttl=60.0) def max_mig_device_count(self) -> int: """Return the maximum number of MIG instances the device supports. @@ -1847,7 +1826,6 @@ class PhysicalDevice(Device): ignore_function_not_found=True, ) - @ttl_cache(ttl=60.0) def mig_device(self, mig_index: int) -> MigDevice: """Return a child MIG device of the given index. @@ -1858,7 +1836,6 @@ class PhysicalDevice(Device): with _global_physical_device(self): return MigDevice(index=(self.index, mig_index)) - @ttl_cache(ttl=60.0) def mig_devices(self) -> list[MigDevice]: """Return a list of children MIG devices of the current device. @@ -2496,7 +2473,7 @@ def _get_global_physical_device() -> PhysicalDevice: return _GLOBAL_PHYSICAL_DEVICE -@ttl_cache(ttl=300.0) +@functools.lru_cache() def _parse_cuda_visible_devices( # pylint: disable=too-many-branches,too-many-statements cuda_visible_devices: str | None = None, format: str = 'index', # pylint: disable=redefined-builtin diff --git a/nvitop/api/host.py b/nvitop/api/host.py index 814f698..591925a 100644 --- a/nvitop/api/host.py +++ b/nvitop/api/host.py @@ -27,7 +27,6 @@ import time as _time from typing import Callable as _Callable import psutil as _psutil -from cachetools.func import ttl_cache as _ttl_cache from psutil import * # noqa: F403 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-builtin @@ -48,17 +47,18 @@ PsutilError = Error # make alias # noqa: F405 del Error # noqa: F821 # pylint: disable=undefined-variable -cpu_percent = _ttl_cache(ttl=0.25)(_psutil.cpu_percent) -virtual_memory = _ttl_cache(ttl=0.25)(_psutil.virtual_memory) -swap_memory = _ttl_cache(ttl=0.25)(_psutil.swap_memory) +cpu_percent = _psutil.cpu_percent +virtual_memory = _psutil.virtual_memory +swap_memory = _psutil.swap_memory -try: - load_average: _Callable[[], tuple[float, float, float]] = _ttl_cache(ttl=2.0)( - _psutil.getloadavg, - ) - load_average.__doc__ = """Get the system load average.""" -except AttributeError: +if hasattr(_psutil, 'getloadavg'): + + def load_average() -> tuple[float, float, float]: + """Get the system load average.""" + return _psutil.getloadavg() + +else: def load_average() -> None: """Get the system load average.""" diff --git a/nvitop/api/process.py b/nvitop/api/process.py index 5837834..0e0dd3e 100644 --- a/nvitop/api/process.py +++ b/nvitop/api/process.py @@ -652,7 +652,6 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi """Update the GPU consumption status from a new NVML query.""" self.set_gpu_memory(NA) self.set_gpu_utilization(NA, NA, NA, NA) - self.device.processes.cache_clear() self.device.processes() return self.gpu_memory() diff --git a/nvitop/gui/library/device.py b/nvitop/gui/library/device.py index 2bd538f..3602ff6 100644 --- a/nvitop/gui/library/device.py +++ b/nvitop/gui/library/device.py @@ -3,6 +3,8 @@ # pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring +from cachetools.func import ttl_cache + from nvitop.api import NA from nvitop.api import MigDevice as MigDeviceBase from nvitop.api import PhysicalDevice as DeviceBase @@ -78,19 +80,19 @@ class Device(DeviceBase): self.as_snapshot() return self._snapshot - def mig_devices(self): - mig_devices = [] - - if self.is_mig_mode_enabled(): - for mig_index in range(self.max_mig_device_count()): - try: - mig_device = MigDevice(index=(self.index, mig_index)) - except libnvml.NVMLError: - break - else: - mig_devices.append(mig_device) - - return mig_devices + fan_speed = ttl_cache(ttl=5.0)(DeviceBase.fan_speed) + temperature = ttl_cache(ttl=5.0)(DeviceBase.temperature) + power_usage = ttl_cache(ttl=5.0)(DeviceBase.power_usage) + display_active = ttl_cache(ttl=5.0)(DeviceBase.display_active) + display_mode = ttl_cache(ttl=5.0)(DeviceBase.display_mode) + current_driver_model = ttl_cache(ttl=5.0)(DeviceBase.current_driver_model) + persistence_mode = ttl_cache(ttl=5.0)(DeviceBase.persistence_mode) + performance_state = ttl_cache(ttl=5.0)(DeviceBase.performance_state) + total_volatile_uncorrected_ecc_errors = ttl_cache(ttl=5.0)( + DeviceBase.total_volatile_uncorrected_ecc_errors, + ) + compute_mode = ttl_cache(ttl=5.0)(DeviceBase.compute_mode) + mig_mode = ttl_cache(ttl=5.0)(DeviceBase.mig_mode) def memory_percent_string(self): # in percentage return utilization2string(self.memory_percent())