From b214e0a713697c0b2c9eb7a23bf1e2694d0da17b Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 5 Jul 2022 00:06:13 +0800 Subject: [PATCH] chore(cli): add message for missing functions on CUDA 10.x driver Signed-off-by: Xuehai Pan --- nvitop/cli.py | 24 ++++++++++++++++++++---- nvitop/core/libnvml.py | 6 +++--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/nvitop/cli.py b/nvitop/cli.py index 9459d9b..1833f22 100644 --- a/nvitop/cli.py +++ b/nvitop/cli.py @@ -208,10 +208,12 @@ def main(): # pylint: disable=too-many-branches,too-many-statements,too-many-lo top.destroy() if len(nvml.UNKNOWN_FUNCTIONS) > 0: - messages.append('ERROR: A FunctionNotFound error occurred while calling:') - if len(nvml.UNKNOWN_FUNCTIONS) > 1: - messages[-1] = messages[-1].replace('A FunctionNotFound error', 'Some FunctionNotFound errors') - messages.extend([ + unknown_function_messages = [ + 'ERROR: Some FunctionNotFound errors occurred while calling:' + if len(nvml.UNKNOWN_FUNCTIONS) > 1 + else 'ERROR: A FunctionNotFound error occurred while calling:' + ] + unknown_function_messages.extend([ *list(map(' nvmlQuery({.__name__!r}, *args, **kwargs)'.format, nvml.UNKNOWN_FUNCTIONS)), ('Please verify whether the `{0}` package is compatible with your NVIDIA driver version.\n' 'You can check the release history of `{0}` and install the compatible version manually.\n' @@ -220,6 +222,20 @@ def main(): # pylint: disable=too-many-branches,too-many-statements,too-many-lo colored('https://github.com/XuehaiPan/nvitop#installation', attrs=('underline',)) ) ]) + message = '\n'.join(unknown_function_messages) + if ( + 'nvmlDeviceGetComputeRunningProcesses' in message + or 'nvmlDeviceGetGraphicsRunningProcesses' in message + and Device.cuda_version().startswith('10.') + ): + message = '\n'.join(( + message, '', + 'You are using CUDA 10.x driver (yours is: @VERSION@) which is too old. Please contact', + 'your system admin to update the NVIDIA driver, or reinstall `nvitop` using:', + ' pip3 install "nvitop[cuda10]"' + )).replace('@VERSION@', Device.driver_version()) + messages.append(message) + if len(messages) > 0: for message in messages: if message.startswith('ERROR:'): diff --git a/nvitop/core/libnvml.py b/nvitop/core/libnvml.py index 36daa13..243b3f6 100644 --- a/nvitop/core/libnvml.py +++ b/nvitop/core/libnvml.py @@ -28,7 +28,7 @@ class libnvml: """Base exception class for NVML query errors.""" LOGGER = logging.getLogger('NVML') - UNKNOWN_FUNCTIONS = set() + UNKNOWN_FUNCTIONS = {} VERSIONED_PATTERN = re.compile(r'^(?P\w+)(?P_v(\d)+)$') c_nvmlDevice_t = pynvml.c_nvmlDevice_t @@ -226,11 +226,11 @@ class libnvml: raise nvml.NVMLError_FunctionNotFound from e retval = func(*args, **kwargs) - except nvml.NVMLError_FunctionNotFound: # pylint: disable=no-member + except nvml.NVMLError_FunctionNotFound as e: # pylint: disable=no-member if not ignore_function_not_found: with self._lock: if func not in self.UNKNOWN_FUNCTIONS: - self.UNKNOWN_FUNCTIONS.add(func) + self.UNKNOWN_FUNCTIONS[func] = e self.LOGGER.error( 'ERROR: A FunctionNotFound error occurred while calling %s.\n' 'Please verify whether the `nvidia-ml-py` package is '