mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-22 06:05:47 -06:00
chore(core/libnvml): add notes for incompatible nvidia-ml-py package for memory info version 2 APIs
Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
parent
34253e521e
commit
c1039222df
2 changed files with 92 additions and 30 deletions
|
|
@ -353,9 +353,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements,too-many-lo
|
|||
parent = HostProcess().parent()
|
||||
grandparent = parent.parent() if parent is not None else None
|
||||
if grandparent is not None and parent.name() == 'sh' and grandparent.name() == 'watch':
|
||||
print(
|
||||
'HINT: You are running `nvitop` under `watch` command. Please try `nvitop -m` directly.',
|
||||
file=sys.stderr,
|
||||
messages.append(
|
||||
'HINT: You are running `nvitop` under `watch` command. Please try `nvitop -m` directly.'
|
||||
)
|
||||
|
||||
ui.print()
|
||||
|
|
@ -397,12 +396,32 @@ def main(): # pylint: disable=too-many-branches,too-many-statements,too-many-lo
|
|||
).replace('@VERSION@', Device.driver_version())
|
||||
messages.append(message)
|
||||
|
||||
# pylint: disable-next=protected-access
|
||||
if libnvml._driver_get_memory_info_v2_available and not libnvml._pynvml_memory_v2_available:
|
||||
messages.append(
|
||||
(
|
||||
'WARNING: The `{0}` package does not support the NVML memory info version 2 APIs, which would\n'
|
||||
'get inaccurate results. Please upgrade `{0}` via `{1}`.'
|
||||
).format(
|
||||
colored('nvidia-ml-py', attrs=('bold',)),
|
||||
colored('pip3 install --upgrade nvitop nvidia-ml-py', attrs=('bold',)),
|
||||
)
|
||||
)
|
||||
|
||||
if len(messages) > 0:
|
||||
for message in messages:
|
||||
if message.startswith('ERROR:'):
|
||||
message = message.replace(
|
||||
'ERROR:', colored('ERROR:', color='red', attrs=('bold',)), 1
|
||||
)
|
||||
elif message.startswith('WARNING:'):
|
||||
message = message.replace(
|
||||
'WARNING:', colored('WARNING:', color='yellow', attrs=('bold',)), 1
|
||||
)
|
||||
elif message.startswith('HINT:'):
|
||||
message = message.replace(
|
||||
'HINT:', colored('HINT:', color='green', attrs=('bold',)), 1
|
||||
)
|
||||
print(message, file=sys.stderr)
|
||||
return 1
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -541,10 +541,12 @@ __patch_backward_compatibility_layers()
|
|||
del __patch_backward_compatibility_layers
|
||||
|
||||
|
||||
__memory_info_v2_available = None
|
||||
_pynvml_memory_v2_available = hasattr(_pynvml, 'nvmlMemory_v2')
|
||||
_pynvml_get_memory_info_v2_available = _pynvml_memory_v2_available
|
||||
_driver_get_memory_info_v2_available = None
|
||||
|
||||
|
||||
def nvmlDeviceGetMemoryInfo(handle): # pylint: disable=function-redefined
|
||||
def nvmlDeviceGetMemoryInfo(handle): # pylint: disable=function-redefined,too-many-branches
|
||||
"""Retrieves the amount of used, free, reserved and total memory available on the device, in bytes.
|
||||
|
||||
Note:
|
||||
|
|
@ -567,36 +569,77 @@ def nvmlDeviceGetMemoryInfo(handle): # pylint: disable=function-redefined
|
|||
On any unexpected error.
|
||||
"""
|
||||
|
||||
global __memory_info_v2_available # pylint: disable=global-statement
|
||||
global _pynvml_get_memory_info_v2_available, _driver_get_memory_info_v2_available # pylint: disable=global-statement
|
||||
|
||||
if __memory_info_v2_available is None:
|
||||
if not hasattr(_pynvml, 'nvmlMemory_v2'):
|
||||
_lazy_init()
|
||||
|
||||
if _driver_get_memory_info_v2_available is None:
|
||||
try:
|
||||
# pylint: disable-next=protected-access
|
||||
_pynvml._nvmlGetFunctionPointer('nvmlDeviceGetMemoryInfo_v2')
|
||||
except NVMLError_FunctionNotFound:
|
||||
with __lock:
|
||||
__memory_info_v2_available = False
|
||||
_driver_get_memory_info_v2_available = False
|
||||
_pynvml_get_memory_info_v2_available = False
|
||||
else:
|
||||
with __lock:
|
||||
_driver_get_memory_info_v2_available = True
|
||||
|
||||
if _driver_get_memory_info_v2_available:
|
||||
if _pynvml_memory_v2_available:
|
||||
# driver ✔ pynvml ?
|
||||
try:
|
||||
# pylint: disable-next=unexpected-keyword-arg,no-member
|
||||
retval = _pynvml.nvmlDeviceGetMemoryInfo(handle, version=_pynvml.nvmlMemory_v2)
|
||||
except TypeError as ex:
|
||||
if 'unexpected keyword argument' in str(ex).lower():
|
||||
# driver ✔ pynvml ✘
|
||||
with __lock:
|
||||
_pynvml_get_memory_info_v2_available = False
|
||||
LOGGER.debug(
|
||||
'NVML memory info version 2 is not available due to incompatible `nvidia-ml-py` package.'
|
||||
)
|
||||
else:
|
||||
# driver ✔ pynvml ? user ✘
|
||||
with __lock:
|
||||
_driver_get_memory_info_v2_available = (
|
||||
None # unset the flag for user exceptions
|
||||
)
|
||||
raise
|
||||
except (NVMLError_FunctionNotFound, NVMLError_Unknown):
|
||||
# driver ✔ pynvml ✘
|
||||
with __lock:
|
||||
_pynvml_get_memory_info_v2_available = False
|
||||
LOGGER.debug(
|
||||
'NVML memory info version 2 is not available due to incompatible NVIDIA driver.'
|
||||
)
|
||||
else:
|
||||
# driver ✔ pynvml ✔
|
||||
LOGGER.debug('NVML memory info version 2 is available.')
|
||||
return retval
|
||||
else:
|
||||
# driver ✔ pynvml ✘
|
||||
LOGGER.debug(
|
||||
'NVML constant `nvmlMemory_v2` not found in package `nvidia-ml-py`, but '
|
||||
'your NVIDIA driver does support the NVML memory info version 2 APIs. NVML '
|
||||
'memory info version 2 is not available due to the legacy dependencies. '
|
||||
'Please consider upgrading your `nvidia-ml-py` package by running '
|
||||
'`pip3 install --upgrade nvitop nvidia-ml-py`.'
|
||||
)
|
||||
elif _pynvml_memory_v2_available:
|
||||
# driver ✘ pynvml ?
|
||||
LOGGER.debug(
|
||||
'NVML constant `nvmlMemory_v2` not found. NVML memory info version 2 is not available.'
|
||||
'NVML memory info version 2 is not available due to incompatible NVIDIA driver.'
|
||||
)
|
||||
else:
|
||||
try:
|
||||
# pylint: disable-next=unexpected-keyword-arg,no-member
|
||||
retval = _pynvml.nvmlDeviceGetMemoryInfo(handle, version=_pynvml.nvmlMemory_v2)
|
||||
except TypeError as ex:
|
||||
if 'unexpected keyword argument' in str(ex).lower():
|
||||
with __lock:
|
||||
__memory_info_v2_available = False
|
||||
LOGGER.debug('NVML memory info version 2 is not available.')
|
||||
else:
|
||||
raise
|
||||
except (NVMLError_FunctionNotFound, NVMLError_Unknown):
|
||||
with __lock:
|
||||
__memory_info_v2_available = False
|
||||
LOGGER.debug('NVML memory info version 2 is not available.')
|
||||
else:
|
||||
with __lock:
|
||||
__memory_info_v2_available = True
|
||||
LOGGER.debug('NVML memory info version 2 is available.')
|
||||
return retval
|
||||
elif __memory_info_v2_available:
|
||||
# driver ✘ pynvml ✘
|
||||
LOGGER.debug(
|
||||
'NVML constant `nvmlMemory_v2` not found in package `nvidia-ml-py`, and '
|
||||
'your NVIDIA driver does not support the NVML memory info version 2 APIs. '
|
||||
'NVML memory info version 2 is not available.'
|
||||
)
|
||||
|
||||
elif _pynvml_get_memory_info_v2_available:
|
||||
# pylint: disable-next=unexpected-keyword-arg
|
||||
return _pynvml.nvmlDeviceGetMemoryInfo(handle, version=_pynvml.nvmlMemory_v2)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue