refactor(api): move TTLCache usage to CLI-only (#66)

This commit is contained in:
Xuehai Pan 2023-04-07 16:51:07 +08:00 committed by GitHub
parent df42d0c0f0
commit c883884073
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 38 additions and 50 deletions

View file

@ -57,6 +57,16 @@ jobs:
python -m nvitop.select --version
python -m nvitop.select --help
- name: Import tests (Python 3.7)
run: |
"${{ steps.py37.outputs.python-path }}" -m pip install --upgrade pip setuptools
"${{ steps.py37.outputs.python-path }}" -m pip install -r requirements.txt
"${{ steps.py37.outputs.python-path }}" -c 'import nvitop'
"${{ steps.py37.outputs.python-path }}" -m nvitop --version
"${{ steps.py37.outputs.python-path }}" -m nvitop --help
"${{ steps.py37.outputs.python-path }}" -m nvitop.select --version
"${{ steps.py37.outputs.python-path }}" -m nvitop.select --help
- name: Install linters
run: |
python -m pip install --upgrade pre-commit pylint[spelling]

View file

@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
-
- Move `TTLCache` usage to CLI-only by [@XuehaiPan](https://github.com/XuehaiPan) in [#66](https://github.com/XuehaiPan/nvitop/pull/66).
### Fixed

View file

@ -104,6 +104,7 @@ Examples:
from __future__ import annotations
import contextlib
import functools
import multiprocessing as mp
import os
import re
@ -111,8 +112,6 @@ import threading
from collections import OrderedDict
from typing import Any, Callable, Iterable, NamedTuple
from cachetools.func import ttl_cache
from nvitop.api import libcuda, libcudart, libnvml
from nvitop.api.process import GpuProcess
from nvitop.api.utils import NA, NaType, Snapshot, boolify, bytes2human, memoize_when_activated
@ -730,7 +729,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
)
func = getattr(libnvml, 'nvmlDeviceGet' + pascal_case + suffix)
@ttl_cache(ttl=1.0)
def attribute(*args: Any, **kwargs: Any) -> Any:
try:
return libnvml.nvmlQuery(
@ -877,7 +875,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return libnvml.nvmlQuery('nvmlDeviceGetSerial', self.handle)
@memoize_when_activated
@ttl_cache(ttl=1.0)
def memory_info(self) -> MemoryInfo: # in bytes
"""Return a named tuple with memory information (in bytes) for the device.
@ -982,7 +979,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return f'{self.memory_used_human()} / {self.memory_total_human()}'
@memoize_when_activated
@ttl_cache(ttl=1.0)
def bar1_memory_info(self) -> MemoryInfo: # in bytes
"""Return a named tuple with BAR1 memory information (in bytes) for the device.
@ -1069,7 +1065,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return f'{self.bar1_memory_used_human()} / {self.bar1_memory_total_human()}'
@memoize_when_activated
@ttl_cache(ttl=1.0)
def utilization_rates(self) -> UtilizationRates: # in percentage
"""Return a named tuple with GPU utilization rates (in percentage) for the device.
@ -1143,7 +1138,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return self.utilization_rates().decoder
@memoize_when_activated
@ttl_cache(ttl=5.0)
def clock_infos(self) -> ClockInfos: # in MHz
"""Return a named tuple with current clock speeds (in MHz) for the device.
@ -1168,7 +1162,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
clocks = clock_infos
@memoize_when_activated
@ttl_cache(ttl=5.0)
def max_clock_infos(self) -> ClockInfos: # in MHz
"""Return a named tuple with maximum clock speeds (in MHz) for the device.
@ -1309,7 +1302,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
""" # pylint: disable=line-too-long
return self.max_clock_infos().video
@ttl_cache(ttl=5.0)
def fan_speed(self) -> int | NaType: # in percentage
"""The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at.
@ -1329,7 +1321,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
""" # pylint: disable=line-too-long
return libnvml.nvmlQuery('nvmlDeviceGetFanSpeed', self.handle)
@ttl_cache(ttl=5.0)
def temperature(self) -> int | NaType: # in Celsius
"""Core GPU temperature in degrees C.
@ -1349,7 +1340,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
)
@memoize_when_activated
@ttl_cache(ttl=5.0)
def power_usage(self) -> int | NaType: # in milliwatts (mW)
"""The last measured power draw for the entire board in milliwatts.
@ -1367,7 +1357,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
power_draw = power_usage # in milliwatts (mW)
@memoize_when_activated
@ttl_cache(ttl=60.0)
def power_limit(self) -> int | NaType: # in milliwatts (mW)
"""The software power limit in milliwatts.
@ -1398,7 +1387,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
power_limit = f'{round(power_limit / 1000.0)}W'
return f'{power_usage} / {power_limit}'
@ttl_cache(ttl=60.0)
def display_active(self) -> str | NaType:
"""A flag that indicates whether a display is initialized on the GPU's (e.g. memory is allocated on the device for display).
@ -1421,7 +1409,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
NA,
)
@ttl_cache(ttl=60.0)
def display_mode(self) -> str | NaType:
"""A flag that indicates whether a physical display (e.g. monitor) is currently connected to any of the GPU's connectors.
@ -1443,7 +1430,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
NA,
)
@ttl_cache(ttl=60.0)
def current_driver_model(self) -> str | NaType:
"""The driver model currently in use.
@ -1471,7 +1457,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
driver_model = current_driver_model
@ttl_cache(ttl=60.0)
def persistence_mode(self) -> str | NaType:
"""A flag that indicates whether persistence mode is enabled for the GPU. Value is either "Enabled" or "Disabled".
@ -1495,7 +1480,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
NA,
)
@ttl_cache(ttl=5.0)
def performance_state(self) -> str | NaType:
"""The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).
@ -1513,7 +1497,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
performance_state = 'P' + str(performance_state)
return performance_state
@ttl_cache(ttl=5.0)
def total_volatile_uncorrected_ecc_errors(self) -> int | NaType:
"""Total errors detected across entire chip.
@ -1533,7 +1516,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
libnvml.NVML_VOLATILE_ECC,
)
@ttl_cache(ttl=60.0)
def compute_mode(self) -> str | NaType:
"""The compute mode flag indicates whether individual or multiple compute applications may run on the GPU.
@ -1588,7 +1570,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
self._is_mig_device = bool(is_mig_device) # nvmlDeviceIsMigDeviceHandle returns c_uint
return self._is_mig_device
@ttl_cache(ttl=60.0)
def mig_mode(self) -> str | NaType:
"""The MIG mode that the GPU is currently operating under.
@ -1653,7 +1634,6 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return [self]
return self.mig_devices()
@ttl_cache(ttl=2.0)
def processes(self) -> dict[int, GpuProcess]:
"""Return a dictionary of processes running on the GPU.
@ -1834,7 +1814,6 @@ class PhysicalDevice(Device):
"""
return self._nvml_index
@ttl_cache(ttl=60.0)
def max_mig_device_count(self) -> int:
"""Return the maximum number of MIG instances the device supports.
@ -1847,7 +1826,6 @@ class PhysicalDevice(Device):
ignore_function_not_found=True,
)
@ttl_cache(ttl=60.0)
def mig_device(self, mig_index: int) -> MigDevice:
"""Return a child MIG device of the given index.
@ -1858,7 +1836,6 @@ class PhysicalDevice(Device):
with _global_physical_device(self):
return MigDevice(index=(self.index, mig_index))
@ttl_cache(ttl=60.0)
def mig_devices(self) -> list[MigDevice]:
"""Return a list of children MIG devices of the current device.
@ -2496,7 +2473,7 @@ def _get_global_physical_device() -> PhysicalDevice:
return _GLOBAL_PHYSICAL_DEVICE
@ttl_cache(ttl=300.0)
@functools.lru_cache()
def _parse_cuda_visible_devices( # pylint: disable=too-many-branches,too-many-statements
cuda_visible_devices: str | None = None,
format: str = 'index', # pylint: disable=redefined-builtin

View file

@ -27,7 +27,6 @@ import time as _time
from typing import Callable as _Callable
import psutil as _psutil
from cachetools.func import ttl_cache as _ttl_cache
from psutil import * # noqa: F403 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-builtin
@ -48,17 +47,18 @@ PsutilError = Error # make alias # noqa: F405
del Error # noqa: F821 # pylint: disable=undefined-variable
cpu_percent = _ttl_cache(ttl=0.25)(_psutil.cpu_percent)
virtual_memory = _ttl_cache(ttl=0.25)(_psutil.virtual_memory)
swap_memory = _ttl_cache(ttl=0.25)(_psutil.swap_memory)
cpu_percent = _psutil.cpu_percent
virtual_memory = _psutil.virtual_memory
swap_memory = _psutil.swap_memory
try:
load_average: _Callable[[], tuple[float, float, float]] = _ttl_cache(ttl=2.0)(
_psutil.getloadavg,
)
load_average.__doc__ = """Get the system load average."""
except AttributeError:
if hasattr(_psutil, 'getloadavg'):
def load_average() -> tuple[float, float, float]:
"""Get the system load average."""
return _psutil.getloadavg()
else:
def load_average() -> None:
"""Get the system load average."""

View file

@ -652,7 +652,6 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
"""Update the GPU consumption status from a new NVML query."""
self.set_gpu_memory(NA)
self.set_gpu_utilization(NA, NA, NA, NA)
self.device.processes.cache_clear()
self.device.processes()
return self.gpu_memory()

View file

@ -3,6 +3,8 @@
# pylint: disable=missing-module-docstring,missing-class-docstring,missing-function-docstring
from cachetools.func import ttl_cache
from nvitop.api import NA
from nvitop.api import MigDevice as MigDeviceBase
from nvitop.api import PhysicalDevice as DeviceBase
@ -78,19 +80,19 @@ class Device(DeviceBase):
self.as_snapshot()
return self._snapshot
def mig_devices(self):
mig_devices = []
if self.is_mig_mode_enabled():
for mig_index in range(self.max_mig_device_count()):
try:
mig_device = MigDevice(index=(self.index, mig_index))
except libnvml.NVMLError:
break
else:
mig_devices.append(mig_device)
return mig_devices
fan_speed = ttl_cache(ttl=5.0)(DeviceBase.fan_speed)
temperature = ttl_cache(ttl=5.0)(DeviceBase.temperature)
power_usage = ttl_cache(ttl=5.0)(DeviceBase.power_usage)
display_active = ttl_cache(ttl=5.0)(DeviceBase.display_active)
display_mode = ttl_cache(ttl=5.0)(DeviceBase.display_mode)
current_driver_model = ttl_cache(ttl=5.0)(DeviceBase.current_driver_model)
persistence_mode = ttl_cache(ttl=5.0)(DeviceBase.persistence_mode)
performance_state = ttl_cache(ttl=5.0)(DeviceBase.performance_state)
total_volatile_uncorrected_ecc_errors = ttl_cache(ttl=5.0)(
DeviceBase.total_volatile_uncorrected_ecc_errors,
)
compute_mode = ttl_cache(ttl=5.0)(DeviceBase.compute_mode)
mig_mode = ttl_cache(ttl=5.0)(DeviceBase.mig_mode)
def memory_percent_string(self): # in percentage
return utilization2string(self.memory_percent())