mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-15 14:15:55 -06:00
Fix incorrect memory reporting on coherent UMA platforms (GB10 / DGX Spark)
On GB10 / DGX Spark, nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB). This causes nvitop to display full system RAM as GPU memory instead of actually allocatable memory. Fix: detect UMA by comparing NVML total against system virtual memory total. If total >= 90% of system RAM, treat as unified memory and use system virtual memory (MemAvailable) for display instead. Preserves existing behavior for discrete GPUs. Note: requires validation on GB10 / DGX Spark hardware. The fix has not been independently validated on a coherent UMA system.
This commit is contained in:
parent
a6761eb5c4
commit
de09aeb9f0
1 changed files with 15 additions and 7 deletions
|
|
@ -985,13 +985,21 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
memory_info = NA
|
||||
if libnvml.nvmlCheckReturn(memory_info):
|
||||
if memory_info.total > 0:
|
||||
return MemoryInfo(
|
||||
total=memory_info.total,
|
||||
free=memory_info.free,
|
||||
used=memory_info.used,
|
||||
reserved=getattr(memory_info, 'reserved', NA),
|
||||
)
|
||||
has_unified_memory = True
|
||||
# Detect coherent UMA platforms (e.g. GB10 Grace Blackwell):
|
||||
# nvmlDeviceGetMemoryInfo returns NVML_SUCCESS with total == system MemTotal (~121GB).
|
||||
# If total >= 90% of system RAM, treat as unified memory and use MemAvailable instead.
|
||||
vm = host.virtual_memory()
|
||||
if vm.total > 0 and memory_info.total >= vm.total * 9 // 10:
|
||||
has_unified_memory = True
|
||||
else:
|
||||
return MemoryInfo(
|
||||
total=memory_info.total,
|
||||
free=memory_info.free,
|
||||
used=memory_info.used,
|
||||
reserved=getattr(memory_info, 'reserved', NA),
|
||||
)
|
||||
else:
|
||||
has_unified_memory = True
|
||||
if has_unified_memory:
|
||||
# Device with unified memory
|
||||
# Use system virtual memory as these devices share host memory
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue