mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-15 14:15:55 -06:00
libmxsmi: cache mx-smi -L inventory separately with 60s TTL
Agent-Logs-Url: https://github.com/mhson-kyle/nvitop/sessions/9e63a25a-5033-4588-bfdd-3fb0d64c9d9f Co-authored-by: mhson-kyle <72399227+mhson-kyle@users.noreply.github.com>
This commit is contained in:
parent
a306d69a36
commit
dd9aeb7bca
1 changed files with 35 additions and 2 deletions
|
|
@ -109,6 +109,15 @@ _CACHE_LOCK = threading.RLock()
|
|||
_CACHE: MxSmiSnapshot | None = None
|
||||
_CACHE_EXPIRES_AT = 0.0
|
||||
|
||||
# Inventory data (UUID / name / bus_id) from ``mx-smi -L`` changes very rarely,
|
||||
# so we keep it in a separate cache with a much longer TTL to avoid spawning an
|
||||
# extra subprocess on every 0.25 s snapshot refresh.
|
||||
_LIST_CACHE_TTL = 60.0
|
||||
_LIST_CACHE_LOCK = threading.RLock()
|
||||
_LIST_CACHE: dict[int, DeviceInfo] | None = None
|
||||
_LIST_CACHE_VERSION: str | NaType = NA
|
||||
_LIST_CACHE_EXPIRES_AT = 0.0
|
||||
|
||||
_LIST_RE = re.compile(
|
||||
r'^GPU#(?P<index>\d+)\s+'
|
||||
r'(?P<name>.+?)\s+'
|
||||
|
|
@ -220,16 +229,40 @@ def snapshot(*, ttl: float = _CACHE_TTL) -> MxSmiSnapshot:
|
|||
|
||||
|
||||
def clear_cache() -> None:
|
||||
"""Clear the cached ``mx-smi`` snapshot."""
|
||||
"""Clear the cached ``mx-smi`` snapshot and device inventory."""
|
||||
global _CACHE, _CACHE_EXPIRES_AT # pylint: disable=global-statement
|
||||
global _LIST_CACHE, _LIST_CACHE_VERSION, _LIST_CACHE_EXPIRES_AT # pylint: disable=global-statement
|
||||
|
||||
with _CACHE_LOCK:
|
||||
_CACHE = None
|
||||
_CACHE_EXPIRES_AT = 0.0
|
||||
|
||||
with _LIST_CACHE_LOCK:
|
||||
_LIST_CACHE = None
|
||||
_LIST_CACHE_VERSION = NA
|
||||
_LIST_CACHE_EXPIRES_AT = 0.0
|
||||
|
||||
|
||||
def _get_inventory_cache() -> tuple[dict[int, DeviceInfo], str | NaType]:
|
||||
"""Return the cached ``mx-smi -L`` inventory, refreshing when stale."""
|
||||
global _LIST_CACHE, _LIST_CACHE_VERSION, _LIST_CACHE_EXPIRES_AT # pylint: disable=global-statement
|
||||
|
||||
now = time.monotonic()
|
||||
with _LIST_CACHE_LOCK:
|
||||
if _LIST_CACHE is not None and now < _LIST_CACHE_EXPIRES_AT:
|
||||
return _LIST_CACHE, _LIST_CACHE_VERSION
|
||||
|
||||
listed_devices, mxsmi_version = _parse_list_output(_run_mxsmi('-L'))
|
||||
|
||||
with _LIST_CACHE_LOCK:
|
||||
_LIST_CACHE = listed_devices
|
||||
_LIST_CACHE_VERSION = mxsmi_version
|
||||
_LIST_CACHE_EXPIRES_AT = time.monotonic() + _LIST_CACHE_TTL
|
||||
return _LIST_CACHE, _LIST_CACHE_VERSION
|
||||
|
||||
|
||||
def _take_snapshot() -> MxSmiSnapshot:
|
||||
listed_devices, listed_mxsmi_version = _parse_list_output(_run_mxsmi('-L'))
|
||||
listed_devices, listed_mxsmi_version = _get_inventory_cache()
|
||||
summary = _parse_summary_output(_run_mxsmi())
|
||||
|
||||
devices = listed_devices.copy()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue