mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-15 14:15:55 -06:00
refactor(api): move TTLCache usage to CLI-only (#66)
This commit is contained in:
parent
df42d0c0f0
commit
c883884073
6 changed files with 38 additions and 50 deletions
10
.github/workflows/lint.yaml
vendored
10
.github/workflows/lint.yaml
vendored
|
|
@ -57,6 +57,16 @@ jobs:
|
|||
python -m nvitop.select --version
|
||||
python -m nvitop.select --help
|
||||
|
||||
- name: Import tests (Python 3.7)
|
||||
run: |
|
||||
"${{ steps.py37.outputs.python-path }}" -m pip install --upgrade pip setuptools
|
||||
"${{ steps.py37.outputs.python-path }}" -m pip install -r requirements.txt
|
||||
"${{ steps.py37.outputs.python-path }}" -c 'import nvitop'
|
||||
"${{ steps.py37.outputs.python-path }}" -m nvitop --version
|
||||
"${{ steps.py37.outputs.python-path }}" -m nvitop --help
|
||||
"${{ steps.py37.outputs.python-path }}" -m nvitop.select --version
|
||||
"${{ steps.py37.outputs.python-path }}" -m nvitop.select --help
|
||||
|
||||
- name: Install linters
|
||||
run: |
|
||||
python -m pip install --upgrade pre-commit pylint[spelling]
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
### Changed
|
||||
|
||||
-
|
||||
- Move `TTLCache` usage to CLI-only by [@XuehaiPan](https://github.com/XuehaiPan) in [#66](https://github.com/XuehaiPan/nvitop/pull/66).
|
||||
|
||||
### Fixed
|
||||
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@ Examples:
|
|||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import functools
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import re
|
||||
|
|
@ -111,8 +112,6 @@ import threading
|
|||
from collections import OrderedDict
|
||||
from typing import Any, Callable, Iterable, NamedTuple
|
||||
|
||||
from cachetools.func import ttl_cache
|
||||
|
||||
from nvitop.api import libcuda, libcudart, libnvml
|
||||
from nvitop.api.process import GpuProcess
|
||||
from nvitop.api.utils import NA, NaType, Snapshot, boolify, bytes2human, memoize_when_activated
|
||||
|
|
@ -730,7 +729,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
)
|
||||
func = getattr(libnvml, 'nvmlDeviceGet' + pascal_case + suffix)
|
||||
|
||||
@ttl_cache(ttl=1.0)
|
||||
def attribute(*args: Any, **kwargs: Any) -> Any:
|
||||
try:
|
||||
return libnvml.nvmlQuery(
|
||||
|
|
@ -877,7 +875,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return libnvml.nvmlQuery('nvmlDeviceGetSerial', self.handle)
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=1.0)
|
||||
def memory_info(self) -> MemoryInfo: # in bytes
|
||||
"""Return a named tuple with memory information (in bytes) for the device.
|
||||
|
||||
|
|
@ -982,7 +979,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return f'{self.memory_used_human()} / {self.memory_total_human()}'
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=1.0)
|
||||
def bar1_memory_info(self) -> MemoryInfo: # in bytes
|
||||
"""Return a named tuple with BAR1 memory information (in bytes) for the device.
|
||||
|
||||
|
|
@ -1069,7 +1065,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return f'{self.bar1_memory_used_human()} / {self.bar1_memory_total_human()}'
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=1.0)
|
||||
def utilization_rates(self) -> UtilizationRates: # in percentage
|
||||
"""Return a named tuple with GPU utilization rates (in percentage) for the device.
|
||||
|
||||
|
|
@ -1143,7 +1138,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return self.utilization_rates().decoder
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=5.0)
|
||||
def clock_infos(self) -> ClockInfos: # in MHz
|
||||
"""Return a named tuple with current clock speeds (in MHz) for the device.
|
||||
|
||||
|
|
@ -1168,7 +1162,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
clocks = clock_infos
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=5.0)
|
||||
def max_clock_infos(self) -> ClockInfos: # in MHz
|
||||
"""Return a named tuple with maximum clock speeds (in MHz) for the device.
|
||||
|
||||
|
|
@ -1309,7 +1302,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
""" # pylint: disable=line-too-long
|
||||
return self.max_clock_infos().video
|
||||
|
||||
@ttl_cache(ttl=5.0)
|
||||
def fan_speed(self) -> int | NaType: # in percentage
|
||||
"""The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at.
|
||||
|
||||
|
|
@ -1329,7 +1321,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
""" # pylint: disable=line-too-long
|
||||
return libnvml.nvmlQuery('nvmlDeviceGetFanSpeed', self.handle)
|
||||
|
||||
@ttl_cache(ttl=5.0)
|
||||
def temperature(self) -> int | NaType: # in Celsius
|
||||
"""Core GPU temperature in degrees C.
|
||||
|
||||
|
|
@ -1349,7 +1340,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
)
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=5.0)
|
||||
def power_usage(self) -> int | NaType: # in milliwatts (mW)
|
||||
"""The last measured power draw for the entire board in milliwatts.
|
||||
|
||||
|
|
@ -1367,7 +1357,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
power_draw = power_usage # in milliwatts (mW)
|
||||
|
||||
@memoize_when_activated
|
||||
@ttl_cache(ttl=60.0)
|
||||
def power_limit(self) -> int | NaType: # in milliwatts (mW)
|
||||
"""The software power limit in milliwatts.
|
||||
|
||||
|
|
@ -1398,7 +1387,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
power_limit = f'{round(power_limit / 1000.0)}W'
|
||||
return f'{power_usage} / {power_limit}'
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def display_active(self) -> str | NaType:
|
||||
"""A flag that indicates whether a display is initialized on the GPU's (e.g. memory is allocated on the device for display).
|
||||
|
||||
|
|
@ -1421,7 +1409,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
NA,
|
||||
)
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def display_mode(self) -> str | NaType:
|
||||
"""A flag that indicates whether a physical display (e.g. monitor) is currently connected to any of the GPU's connectors.
|
||||
|
||||
|
|
@ -1443,7 +1430,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
NA,
|
||||
)
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def current_driver_model(self) -> str | NaType:
|
||||
"""The driver model currently in use.
|
||||
|
||||
|
|
@ -1471,7 +1457,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
driver_model = current_driver_model
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def persistence_mode(self) -> str | NaType:
|
||||
"""A flag that indicates whether persistence mode is enabled for the GPU. Value is either "Enabled" or "Disabled".
|
||||
|
||||
|
|
@ -1495,7 +1480,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
NA,
|
||||
)
|
||||
|
||||
@ttl_cache(ttl=5.0)
|
||||
def performance_state(self) -> str | NaType:
|
||||
"""The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).
|
||||
|
||||
|
|
@ -1513,7 +1497,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
performance_state = 'P' + str(performance_state)
|
||||
return performance_state
|
||||
|
||||
@ttl_cache(ttl=5.0)
|
||||
def total_volatile_uncorrected_ecc_errors(self) -> int | NaType:
|
||||
"""Total errors detected across entire chip.
|
||||
|
||||
|
|
@ -1533,7 +1516,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
libnvml.NVML_VOLATILE_ECC,
|
||||
)
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def compute_mode(self) -> str | NaType:
|
||||
"""The compute mode flag indicates whether individual or multiple compute applications may run on the GPU.
|
||||
|
||||
|
|
@ -1588,7 +1570,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
self._is_mig_device = bool(is_mig_device) # nvmlDeviceIsMigDeviceHandle returns c_uint
|
||||
return self._is_mig_device
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def mig_mode(self) -> str | NaType:
|
||||
"""The MIG mode that the GPU is currently operating under.
|
||||
|
||||
|
|
@ -1653,7 +1634,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return [self]
|
||||
return self.mig_devices()
|
||||
|
||||
@ttl_cache(ttl=2.0)
|
||||
def processes(self) -> dict[int, GpuProcess]:
|
||||
"""Return a dictionary of processes running on the GPU.
|
||||
|
||||
|
|
@ -1834,7 +1814,6 @@ class PhysicalDevice(Device):
|
|||
"""
|
||||
return self._nvml_index
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def max_mig_device_count(self) -> int:
|
||||
"""Return the maximum number of MIG instances the device supports.
|
||||
|
||||
|
|
@ -1847,7 +1826,6 @@ class PhysicalDevice(Device):
|
|||
ignore_function_not_found=True,
|
||||
)
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def mig_device(self, mig_index: int) -> MigDevice:
|
||||
"""Return a child MIG device of the given index.
|
||||
|
||||
|
|
@ -1858,7 +1836,6 @@ class PhysicalDevice(Device):
|
|||
with _global_physical_device(self):
|
||||
return MigDevice(index=(self.index, mig_index))
|
||||
|
||||
@ttl_cache(ttl=60.0)
|
||||
def mig_devices(self) -> list[MigDevice]:
|
||||
"""Return a list of children MIG devices of the current device.
|
||||
|
||||
|
|
@ -2496,7 +2473,7 @@ def _get_global_physical_device() -> PhysicalDevice:
|
|||
return _GLOBAL_PHYSICAL_DEVICE
|
||||
|
||||
|
||||
@ttl_cache(ttl=300.0)
|
||||
@functools.lru_cache()
|
||||
def _parse_cuda_visible_devices( # pylint: disable=too-many-branches,too-many-statements
|
||||
cuda_visible_devices: str | None = None,
|
||||
format: str = 'index', # pylint: disable=redefined-builtin
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ import time as _time
|
|||
from typing import Callable as _Callable
|
||||
|
||||
import psutil as _psutil
|
||||
from cachetools.func import ttl_cache as _ttl_cache
|
||||
from psutil import * # noqa: F403 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-builtin
|
||||
|
||||
|
||||
|
|
@ -48,17 +47,18 @@ PsutilError = Error # make alias # noqa: F405
|
|||
del Error # noqa: F821 # pylint: disable=undefined-variable
|
||||
|
||||
|
||||
cpu_percent = _ttl_cache(ttl=0.25)(_psutil.cpu_percent)
|
||||
virtual_memory = _ttl_cache(ttl=0.25)(_psutil.virtual_memory)
|
||||
swap_memory = _ttl_cache(ttl=0.25)(_psutil.swap_memory)
|
||||
cpu_percent = _psutil.cpu_percent
|
||||
virtual_memory = _psutil.virtual_memory
|
||||
swap_memory = _psutil.swap_memory
|
||||
|
||||
|
||||
try:
|
||||
load_average: _Callable[[], tuple[float, float, float]] = _ttl_cache(ttl=2.0)(
|
||||
_psutil.getloadavg,
|
||||
)
|
||||
load_average.__doc__ = """Get the system load average."""
|
||||
except AttributeError:
|
||||
if hasattr(_psutil, 'getloadavg'):
|
||||
|
||||
def load_average() -> tuple[float, float, float]:
|
||||
"""Get the system load average."""
|
||||
return _psutil.getloadavg()
|
||||
|
||||
else:
|
||||
|
||||
def load_average() -> None:
|
||||
"""Get the system load average."""
|
||||
|
|
|
|||
|
|
@ -652,7 +652,6 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
"""Update the GPU consumption status from a new NVML query."""
|
||||
self.set_gpu_memory(NA)
|
||||
self.set_gpu_utilization(NA, NA, NA, NA)
|
||||
self.device.processes.cache_clear()
|
||||
self.device.processes()
|
||||
return self.gpu_memory()
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring
|
||||
|
||||
from cachetools.func import ttl_cache
|
||||
|
||||
from nvitop.api import NA
|
||||
from nvitop.api import MigDevice as MigDeviceBase
|
||||
from nvitop.api import PhysicalDevice as DeviceBase
|
||||
|
|
@ -78,19 +80,19 @@ class Device(DeviceBase):
|
|||
self.as_snapshot()
|
||||
return self._snapshot
|
||||
|
||||
def mig_devices(self):
|
||||
mig_devices = []
|
||||
|
||||
if self.is_mig_mode_enabled():
|
||||
for mig_index in range(self.max_mig_device_count()):
|
||||
try:
|
||||
mig_device = MigDevice(index=(self.index, mig_index))
|
||||
except libnvml.NVMLError:
|
||||
break
|
||||
else:
|
||||
mig_devices.append(mig_device)
|
||||
|
||||
return mig_devices
|
||||
fan_speed = ttl_cache(ttl=5.0)(DeviceBase.fan_speed)
|
||||
temperature = ttl_cache(ttl=5.0)(DeviceBase.temperature)
|
||||
power_usage = ttl_cache(ttl=5.0)(DeviceBase.power_usage)
|
||||
display_active = ttl_cache(ttl=5.0)(DeviceBase.display_active)
|
||||
display_mode = ttl_cache(ttl=5.0)(DeviceBase.display_mode)
|
||||
current_driver_model = ttl_cache(ttl=5.0)(DeviceBase.current_driver_model)
|
||||
persistence_mode = ttl_cache(ttl=5.0)(DeviceBase.persistence_mode)
|
||||
performance_state = ttl_cache(ttl=5.0)(DeviceBase.performance_state)
|
||||
total_volatile_uncorrected_ecc_errors = ttl_cache(ttl=5.0)(
|
||||
DeviceBase.total_volatile_uncorrected_ecc_errors,
|
||||
)
|
||||
compute_mode = ttl_cache(ttl=5.0)(DeviceBase.compute_mode)
|
||||
mig_mode = ttl_cache(ttl=5.0)(DeviceBase.mig_mode)
|
||||
|
||||
def memory_percent_string(self): # in percentage
|
||||
return utilization2string(self.memory_percent())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue