chore(cli): add message for missing functions on CUDA 10.x driver

Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
Xuehai Pan 2022-07-05 00:06:13 +08:00
parent 9406192302
commit b214e0a713
2 changed files with 23 additions and 7 deletions

View file

@ -208,10 +208,12 @@ def main(): # pylint: disable=too-many-branches,too-many-statements,too-many-lo
top.destroy()
if len(nvml.UNKNOWN_FUNCTIONS) > 0:
messages.append('ERROR: A FunctionNotFound error occurred while calling:')
if len(nvml.UNKNOWN_FUNCTIONS) > 1:
messages[-1] = messages[-1].replace('A FunctionNotFound error', 'Some FunctionNotFound errors')
messages.extend([
unknown_function_messages = [
'ERROR: Some FunctionNotFound errors occurred while calling:'
if len(nvml.UNKNOWN_FUNCTIONS) > 1
else 'ERROR: A FunctionNotFound error occurred while calling:'
]
unknown_function_messages.extend([
*list(map(' nvmlQuery({.__name__!r}, *args, **kwargs)'.format, nvml.UNKNOWN_FUNCTIONS)),
('Please verify whether the `{0}` package is compatible with your NVIDIA driver version.\n'
'You can check the release history of `{0}` and install the compatible version manually.\n'
@ -220,6 +222,20 @@ def main(): # pylint: disable=too-many-branches,too-many-statements,too-many-lo
colored('https://github.com/XuehaiPan/nvitop#installation', attrs=('underline',))
)
])
message = '\n'.join(unknown_function_messages)
if (
'nvmlDeviceGetComputeRunningProcesses' in message
or 'nvmlDeviceGetGraphicsRunningProcesses' in message
and Device.cuda_version().startswith('10.')
):
message = '\n'.join((
message, '',
'You are using CUDA 10.x driver (yours is: @VERSION@) which is too old. Please contact',
'your system admin to update the NVIDIA driver, or reinstall `nvitop` using:',
' pip3 install "nvitop[cuda10]"'
)).replace('@VERSION@', Device.driver_version())
messages.append(message)
if len(messages) > 0:
for message in messages:
if message.startswith('ERROR:'):

View file

@ -28,7 +28,7 @@ class libnvml:
"""Base exception class for NVML query errors."""
LOGGER = logging.getLogger('NVML')
UNKNOWN_FUNCTIONS = set()
UNKNOWN_FUNCTIONS = {}
VERSIONED_PATTERN = re.compile(r'^(?P<name>\w+)(?P<suffix>_v(\d)+)$')
c_nvmlDevice_t = pynvml.c_nvmlDevice_t
@ -226,11 +226,11 @@ class libnvml:
raise nvml.NVMLError_FunctionNotFound from e
retval = func(*args, **kwargs)
except nvml.NVMLError_FunctionNotFound: # pylint: disable=no-member
except nvml.NVMLError_FunctionNotFound as e: # pylint: disable=no-member
if not ignore_function_not_found:
with self._lock:
if func not in self.UNKNOWN_FUNCTIONS:
self.UNKNOWN_FUNCTIONS.add(func)
self.UNKNOWN_FUNCTIONS[func] = e
self.LOGGER.error(
'ERROR: A FunctionNotFound error occurred while calling %s.\n'
'Please verify whether the `nvidia-ml-py` package is '