refactor(api): move TTLCache usage to CLI-only (#66)

2026-05-15 14:15:55 -06:00 · 2023-04-07 16:51:07 +08:00 · 2023-04-07 16:51:07 +08:00 · c883884073
commit c883884073
parent df42d0c0f0
6 changed files with 38 additions and 50 deletions
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@ -57,6 +57,16 @@ jobs:
          python -m nvitop.select --version
          python -m nvitop.select --help

+      - name: Import tests (Python 3.7)
+        run: |
+          "${{ steps.py37.outputs.python-path }}" -m pip install --upgrade pip setuptools
+          "${{ steps.py37.outputs.python-path }}" -m pip install -r requirements.txt
+          "${{ steps.py37.outputs.python-path }}" -c 'import nvitop'
+          "${{ steps.py37.outputs.python-path }}" -m nvitop --version
+          "${{ steps.py37.outputs.python-path }}" -m nvitop --help
+          "${{ steps.py37.outputs.python-path }}" -m nvitop.select --version
+          "${{ steps.py37.outputs.python-path }}" -m nvitop.select --help
+
      - name: Install linters
        run: |
          python -m pip install --upgrade pre-commit pylint[spelling]
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Changed

-
+- Move `TTLCache` usage to CLI-only by [@XuehaiPan](https://github.com/XuehaiPan) in [#66](https://github.com/XuehaiPan/nvitop/pull/66).

 ### Fixed

--- a/nvitop/api/device.py
+++ b/nvitop/api/device.py
@ -104,6 +104,7 @@ Examples:
 from __future__ import annotations

 import contextlib
+import functools
 import multiprocessing as mp
 import os
 import re
@ -111,8 +112,6 @@ import threading
 from collections import OrderedDict
 from typing import Any, Callable, Iterable, NamedTuple

-from cachetools.func import ttl_cache
-
 from nvitop.api import libcuda, libcudart, libnvml
 from nvitop.api.process import GpuProcess
 from nvitop.api.utils import NA, NaType, Snapshot, boolify, bytes2human, memoize_when_activated
@ -730,7 +729,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                )
                func = getattr(libnvml, 'nvmlDeviceGet' + pascal_case + suffix)

-            @ttl_cache(ttl=1.0)
            def attribute(*args: Any, **kwargs: Any) -> Any:
                try:
                    return libnvml.nvmlQuery(
@ -877,7 +875,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        return libnvml.nvmlQuery('nvmlDeviceGetSerial', self.handle)

    @memoize_when_activated
-    @ttl_cache(ttl=1.0)
    def memory_info(self) -> MemoryInfo:  # in bytes
        """Return a named tuple with memory information (in bytes) for the device.

@ -982,7 +979,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        return f'{self.memory_used_human()} / {self.memory_total_human()}'

    @memoize_when_activated
-    @ttl_cache(ttl=1.0)
    def bar1_memory_info(self) -> MemoryInfo:  # in bytes
        """Return a named tuple with BAR1 memory information (in bytes) for the device.

@ -1069,7 +1065,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        return f'{self.bar1_memory_used_human()} / {self.bar1_memory_total_human()}'

    @memoize_when_activated
-    @ttl_cache(ttl=1.0)
    def utilization_rates(self) -> UtilizationRates:  # in percentage
        """Return a named tuple with GPU utilization rates (in percentage) for the device.

@ -1143,7 +1138,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        return self.utilization_rates().decoder

    @memoize_when_activated
-    @ttl_cache(ttl=5.0)
    def clock_infos(self) -> ClockInfos:  # in MHz
        """Return a named tuple with current clock speeds (in MHz) for the device.

@ -1168,7 +1162,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
    clocks = clock_infos

    @memoize_when_activated
-    @ttl_cache(ttl=5.0)
    def max_clock_infos(self) -> ClockInfos:  # in MHz
        """Return a named tuple with maximum clock speeds (in MHz) for the device.

@ -1309,7 +1302,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """  # pylint: disable=line-too-long
        return self.max_clock_infos().video

-    @ttl_cache(ttl=5.0)
    def fan_speed(self) -> int | NaType:  # in percentage
        """The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at.

@ -1329,7 +1321,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """  # pylint: disable=line-too-long
        return libnvml.nvmlQuery('nvmlDeviceGetFanSpeed', self.handle)

-    @ttl_cache(ttl=5.0)
    def temperature(self) -> int | NaType:  # in Celsius
        """Core GPU temperature in degrees C.

@ -1349,7 +1340,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        )

    @memoize_when_activated
-    @ttl_cache(ttl=5.0)
    def power_usage(self) -> int | NaType:  # in milliwatts (mW)
        """The last measured power draw for the entire board in milliwatts.

@ -1367,7 +1357,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
    power_draw = power_usage  # in milliwatts (mW)

    @memoize_when_activated
-    @ttl_cache(ttl=60.0)
    def power_limit(self) -> int | NaType:  # in milliwatts (mW)
        """The software power limit in milliwatts.

@ -1398,7 +1387,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            power_limit = f'{round(power_limit / 1000.0)}W'
        return f'{power_usage} / {power_limit}'

-    @ttl_cache(ttl=60.0)
    def display_active(self) -> str | NaType:
        """A flag that indicates whether a display is initialized on the GPU's (e.g. memory is allocated on the device for display).

@ -1421,7 +1409,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            NA,
        )

-    @ttl_cache(ttl=60.0)
    def display_mode(self) -> str | NaType:
        """A flag that indicates whether a physical display (e.g. monitor) is currently connected to any of the GPU's connectors.

@ -1443,7 +1430,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            NA,
        )

-    @ttl_cache(ttl=60.0)
    def current_driver_model(self) -> str | NaType:
        """The driver model currently in use.

@ -1471,7 +1457,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

    driver_model = current_driver_model

-    @ttl_cache(ttl=60.0)
    def persistence_mode(self) -> str | NaType:
        """A flag that indicates whether persistence mode is enabled for the GPU. Value is either "Enabled" or "Disabled".

@ -1495,7 +1480,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            NA,
        )

-    @ttl_cache(ttl=5.0)
    def performance_state(self) -> str | NaType:
        """The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).

@ -1513,7 +1497,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            performance_state = 'P' + str(performance_state)
        return performance_state

-    @ttl_cache(ttl=5.0)
    def total_volatile_uncorrected_ecc_errors(self) -> int | NaType:
        """Total errors detected across entire chip.

@ -1533,7 +1516,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            libnvml.NVML_VOLATILE_ECC,
        )

-    @ttl_cache(ttl=60.0)
    def compute_mode(self) -> str | NaType:
        """The compute mode flag indicates whether individual or multiple compute applications may run on the GPU.

@ -1588,7 +1570,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            self._is_mig_device = bool(is_mig_device)  # nvmlDeviceIsMigDeviceHandle returns c_uint
        return self._is_mig_device

-    @ttl_cache(ttl=60.0)
    def mig_mode(self) -> str | NaType:
        """The MIG mode that the GPU is currently operating under.

@ -1653,7 +1634,6 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            return [self]
        return self.mig_devices()

-    @ttl_cache(ttl=2.0)
    def processes(self) -> dict[int, GpuProcess]:
        """Return a dictionary of processes running on the GPU.

@ -1834,7 +1814,6 @@ class PhysicalDevice(Device):
        """
        return self._nvml_index

-    @ttl_cache(ttl=60.0)
    def max_mig_device_count(self) -> int:
        """Return the maximum number of MIG instances the device supports.

@ -1847,7 +1826,6 @@ class PhysicalDevice(Device):
            ignore_function_not_found=True,
        )

-    @ttl_cache(ttl=60.0)
    def mig_device(self, mig_index: int) -> MigDevice:
        """Return a child MIG device of the given index.

@ -1858,7 +1836,6 @@ class PhysicalDevice(Device):
        with _global_physical_device(self):
            return MigDevice(index=(self.index, mig_index))

-    @ttl_cache(ttl=60.0)
    def mig_devices(self) -> list[MigDevice]:
        """Return a list of children MIG devices of the current device.

@ -2496,7 +2473,7 @@ def _get_global_physical_device() -> PhysicalDevice:
        return _GLOBAL_PHYSICAL_DEVICE


-@ttl_cache(ttl=300.0)
+@functools.lru_cache()
 def _parse_cuda_visible_devices(  # pylint: disable=too-many-branches,too-many-statements
    cuda_visible_devices: str | None = None,
    format: str = 'index',  # pylint: disable=redefined-builtin
--- a/nvitop/api/host.py
+++ b/nvitop/api/host.py
@ -27,7 +27,6 @@ import time as _time
 from typing import Callable as _Callable

 import psutil as _psutil
-from cachetools.func import ttl_cache as _ttl_cache
 from psutil import *  # noqa: F403 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-builtin


@ -48,17 +47,18 @@ PsutilError = Error  # make alias # noqa: F405
 del Error  # noqa: F821 # pylint: disable=undefined-variable


-cpu_percent = _ttl_cache(ttl=0.25)(_psutil.cpu_percent)
-virtual_memory = _ttl_cache(ttl=0.25)(_psutil.virtual_memory)
-swap_memory = _ttl_cache(ttl=0.25)(_psutil.swap_memory)
+cpu_percent = _psutil.cpu_percent
+virtual_memory = _psutil.virtual_memory
+swap_memory = _psutil.swap_memory


-try:
-    load_average: _Callable[[], tuple[float, float, float]] = _ttl_cache(ttl=2.0)(
-        _psutil.getloadavg,
-    )
-    load_average.__doc__ = """Get the system load average."""
-except AttributeError:
+if hasattr(_psutil, 'getloadavg'):
+
+    def load_average() -> tuple[float, float, float]:
+        """Get the system load average."""
+        return _psutil.getloadavg()
+
+else:

    def load_average() -> None:
        """Get the system load average."""
--- a/nvitop/api/process.py
+++ b/nvitop/api/process.py
@ -652,7 +652,6 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
        """Update the GPU consumption status from a new NVML query."""
        self.set_gpu_memory(NA)
        self.set_gpu_utilization(NA, NA, NA, NA)
-        self.device.processes.cache_clear()
        self.device.processes()
        return self.gpu_memory()

--- a/nvitop/gui/library/device.py
+++ b/nvitop/gui/library/device.py
@ -3,6 +3,8 @@

 # pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring

+from cachetools.func import ttl_cache
+
 from nvitop.api import NA
 from nvitop.api import MigDevice as MigDeviceBase
 from nvitop.api import PhysicalDevice as DeviceBase
@ -78,19 +80,19 @@ class Device(DeviceBase):
            self.as_snapshot()
        return self._snapshot

-    def mig_devices(self):
-        mig_devices = []
-
-        if self.is_mig_mode_enabled():
-            for mig_index in range(self.max_mig_device_count()):
-                try:
-                    mig_device = MigDevice(index=(self.index, mig_index))
-                except libnvml.NVMLError:
-                    break
-                else:
-                    mig_devices.append(mig_device)
-
-        return mig_devices
+    fan_speed = ttl_cache(ttl=5.0)(DeviceBase.fan_speed)
+    temperature = ttl_cache(ttl=5.0)(DeviceBase.temperature)
+    power_usage = ttl_cache(ttl=5.0)(DeviceBase.power_usage)
+    display_active = ttl_cache(ttl=5.0)(DeviceBase.display_active)
+    display_mode = ttl_cache(ttl=5.0)(DeviceBase.display_mode)
+    current_driver_model = ttl_cache(ttl=5.0)(DeviceBase.current_driver_model)
+    persistence_mode = ttl_cache(ttl=5.0)(DeviceBase.persistence_mode)
+    performance_state = ttl_cache(ttl=5.0)(DeviceBase.performance_state)
+    total_volatile_uncorrected_ecc_errors = ttl_cache(ttl=5.0)(
+        DeviceBase.total_volatile_uncorrected_ecc_errors,
+    )
+    compute_mode = ttl_cache(ttl=5.0)(DeviceBase.compute_mode)
+    mig_mode = ttl_cache(ttl=5.0)(DeviceBase.mig_mode)

    def memory_percent_string(self):  # in percentage
        return utilization2string(self.memory_percent())