docs: add Sphinx-based documents

Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
Xuehai Pan 2022-07-01 12:02:09 +08:00
parent 3bb17f6cc9
commit 102ee45960
24 changed files with 635 additions and 172 deletions

20
docs/Makefile Normal file
View file

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

35
docs/make.bat Normal file
View file

@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

6
docs/requirements.txt Normal file
View file

@ -0,0 +1,6 @@
sphinx
sphinx-rtd-theme
sphinx-copybutton
pytorch-lightning >= 1.5.0
tensorflow >= 2.0

View file

View file

@ -0,0 +1,3 @@
.wy-nav-content {
max-width: none;
}

View file

View file

@ -0,0 +1,45 @@
nvitop.callbacks package
========================
Submodules
----------
nvitop.callbacks.keras module
-----------------------------
.. automodule:: nvitop.callbacks.keras
:members:
:undoc-members:
:show-inheritance:
nvitop.callbacks.pytorch\_lightning module
------------------------------------------
.. automodule:: nvitop.callbacks.pytorch_lightning
:members:
:undoc-members:
:show-inheritance:
nvitop.callbacks.tensorboard module
-----------------------------------
.. automodule:: nvitop.callbacks.tensorboard
:members:
:undoc-members:
:show-inheritance:
nvitop.callbacks.utils module
-----------------------------
.. automodule:: nvitop.callbacks.utils
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: nvitop.callbacks
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,8 @@
nvitop.core.collector module
----------------------------
.. automodule:: nvitop.core.collector
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.device module
-------------------------
.. automodule:: nvitop.core.device
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.host module
-----------------------
.. automodule:: nvitop.core.host
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.libnvml module
--------------------------
.. automodule:: nvitop.core.libnvml
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.process module
--------------------------
.. automodule:: nvitop.core.process
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.utils module
------------------------
.. automodule:: nvitop.core.utils
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,24 @@
nvitop package
==============
Subpackages
-----------
.. toctree::
:maxdepth: 4
core/device
core/process
core/host
core/collector
core/libnvml
core/utils
callbacks
Module contents
---------------
.. automodule:: nvitop
:members:
:undoc-members:
:show-inheritance:

136
docs/source/conf.py Normal file
View file

@ -0,0 +1,136 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
# -- Project information -----------------------------------------------------
project = 'nvitop: the one-stop solution for GPU process management.'
copyright = '2022, Xuehai Pan'
author = 'Xuehai Pan'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
'sphinx.ext.extlinks',
'sphinx_copybutton',
'sphinx_rtd_theme',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'en'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'default'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = [
'style.css',
]
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
extlinks = {
'gitcode': ('https://github.com/XuehaiPan/nvitop/blob/HEAD/%s', '')
}
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'nvitop', 'An interactive NVIDIA-GPU process viewer.',
[author], 1)
]
# -- Extension configuration -------------------------------------------------
# -- Options for napoleon extension ------------------------------------------
napoleon_include_init_with_doc = True
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = True
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
# -- Options for todo extension ----------------------------------------------
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True

18
docs/source/index.rst Normal file
View file

@ -0,0 +1,18 @@
Welcome to nvitop's documentation!
==================================
An interactive NVIDIA-GPU process viewer, the one-stop solution for GPU process management.
.. toctree::
:maxdepth: 4
:caption: Contents:
apis/index
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View file

@ -22,15 +22,21 @@ class GpuStatsLogger(Callback): # pylint: disable=too-many-instance-attributes
a CSVLogger callback to the model. a CSVLogger callback to the model.
Args: Args:
memory_utilization: Set to ``True`` to log used, free and the percentage of memory memory_utilization (bool):
Set to ``True`` to log used, free and the percentage of memory
utilization at the start and end of each step. Default: ``True``. utilization at the start and end of each step. Default: ``True``.
gpu_utilization: Set to ``True`` to log the percentage of GPU utilization gpu_utilization (bool):
Set to ``True`` to log the percentage of GPU utilization
at the start and end of each step. Default: ``True``. at the start and end of each step. Default: ``True``.
intra_step_time: Set to ``True`` to log the time of each step. Default: ``False``. intra_step_time (bool):
inter_step_time: Set to ``True`` to log the time between the end of one step Set to ``True`` to log the time of each step. Default: ``False``.
inter_step_time (bool):
Set to ``True`` to log the time between the end of one step
and the start of the next step. Default: ``False``. and the start of the next step. Default: ``False``.
fan_speed: Set to ``True`` to log percentage of fan speed. Default: ``False``. fan_speed (bool):
temperature: Set to ``True`` to log the gpu temperature in degree Celsius. Set to ``True`` to log percentage of fan speed. Default: ``False``.
temperature (bool):
Set to ``True`` to log the gpu temperature in degree Celsius.
Default: ``False``. Default: ``False``.
Raises: Raises:

View file

@ -22,15 +22,21 @@ class GpuStatsLogger(Callback): # pylint: disable=too-many-instance-attributes
callback and in order to use it you need to assign a logger in the ``Trainer``. callback and in order to use it you need to assign a logger in the ``Trainer``.
Args: Args:
memory_utilization: Set to ``True`` to log used, free and the percentage of memory memory_utilization (bool):
Set to ``True`` to log used, free and the percentage of memory
utilization at the start and end of each step. Default: ``True``. utilization at the start and end of each step. Default: ``True``.
gpu_utilization: Set to ``True`` to log the percentage of GPU utilization gpu_utilization (bool):
Set to ``True`` to log the percentage of GPU utilization
at the start and end of each step. Default: ``True``. at the start and end of each step. Default: ``True``.
intra_step_time: Set to ``True`` to log the time of each step. Default: ``False``. intra_step_time (bool):
inter_step_time: Set to ``True`` to log the time between the end of one step Set to ``True`` to log the time of each step. Default: ``False``.
inter_step_time (bool):
Set to ``True`` to log the time between the end of one step
and the start of the next step. Default: ``False``. and the start of the next step. Default: ``False``.
fan_speed: Set to ``True`` to log percentage of fan speed. Default: ``False``. fan_speed (bool):
temperature: Set to ``True`` to log the gpu temperature in degree Celsius. Set to ``True`` to log percentage of fan speed. Default: ``False``.
temperature (bool):
Set to ``True`` to log the gpu temperature in degree Celsius.
Default: ``False``. Default: ``False``.
Raises: Raises:

View file

@ -159,10 +159,10 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
"""A class for collecting resource metrics. """A class for collecting resource metrics.
Args: Args:
devices (iterable of Device): devices (Iterable[Device]):
Set of Device instances for logging. If not given, all physical Set of Device instances for logging. If not given, all physical
devices on board will be used. devices on board will be used.
root_pids (set of int): root_pids (Set[int]):
A set of PIDs, only the status of the children processes on the GPUs A set of PIDs, only the status of the children processes on the GPUs
will be collected. If not given, the PID of the current process will will be collected. If not given, the PID of the current process will
be used. be used.
@ -422,7 +422,7 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
the sub-collections will be reset as well. the sub-collections will be reset as well.
Args: Args:
tag (str or None): tag (Optional[str]):
The tag to reset. If None, the current active collection The tag to reset. If None, the current active collection
will be reset. will be reset.

View file

@ -3,18 +3,24 @@
"""The live classes for GPU devices. """The live classes for GPU devices.
The core classes are `Device` and `CudaDevice`. The type of returned instance created by `Class(args)` The core classes are ``Device`` and ``CudaDevice``. The type of returned instance created by ``Class(args)``
is depending on the given arguments. is depending on the given arguments.
`Device()` returns: ``Device()`` returns:
.. code-block:: python
- (index: int) -> PhysicalDevice - (index: int) -> PhysicalDevice
- (index: (int, int)) -> MigDevice - (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value - (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
- (bus_id: str) -> PhysicalDevice - (bus_id: str) -> PhysicalDevice
`CudaDevice()` returns: ``CudaDevice()`` returns:
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES` .. code-block:: python
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: int) -> CudaDevice - (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice - (nvml_index: (int, int)) -> CudaMigDevice
@ -127,7 +133,7 @@ def _does_any_device_support_mig_mode() -> bool:
def is_mig_device_uuid(uuid: Optional[str]) -> bool: def is_mig_device_uuid(uuid: Optional[str]) -> bool:
"""Returns `True` if the argument is a MIG device UUID, otherwise, returns `False`.""" """Returns ``True`` if the argument is a MIG device UUID, otherwise, returns ``False``."""
if isinstance(uuid, str): if isinstance(uuid, str):
match = Device.UUID_PATTERN.match(uuid) match = Device.UUID_PATTERN.match(uuid)
@ -139,7 +145,10 @@ def is_mig_device_uuid(uuid: Optional[str]) -> bool:
class Device: # pylint: disable=too-many-instance-attributes,too-many-public-methods class Device: # pylint: disable=too-many-instance-attributes,too-many-public-methods
"""Live class of the GPU devices, different from the device snapshots. """Live class of the GPU devices, different from the device snapshots.
`Device.__new__()` returns different types depending on the given arguments. ``Device.__new__()`` returns different types depending on the given arguments.
.. code-block:: python
- (index: int) -> PhysicalDevice - (index: int) -> PhysicalDevice
- (index: (int, int)) -> MigDevice - (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value - (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
@ -206,6 +215,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=driver_version nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=driver_version
""" """
@ -233,6 +244,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=count nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=count
""" """
@ -251,14 +264,14 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a list of devices of the given indices. """Returns a list of devices of the given indices.
Args: Args:
indices (list of int or tuple of two ints): indices (Iterable[Union[int, Tuple[int, int]]]):
Indices of the devices. For each index, get `PhysicalDevice` for single int Indices of the devices. For each index, get ``PhysicalDevice`` for single int
and `MigDevice` for tuple (int, int). That is: and ``MigDevice`` for tuple (int, int). That is:
- (int) -> PhysicalDevice - (int) -> PhysicalDevice
- ((int, int)) -> MigDevice - ((int, int)) -> MigDevice
Returns: List[Union[PhysicalDevice, MigDevice]] Returns: List[Union[PhysicalDevice, MigDevice]]
A list of `PhysicalDevice` and/or `MigDevice` instances of the given indices. A list of ``PhysicalDevice`` and/or ``MigDevice`` instances of the given indices.
""" """
if indices is None: if indices is None:
@ -271,19 +284,19 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
@staticmethod @staticmethod
def from_cuda_visible_devices() -> List['CudaDevice']: def from_cuda_visible_devices() -> List['CudaDevice']:
"""Returns a list of CUDA devices of the given CUDA indices. """Returns a list of all CUDA visible devices.
The CUDA ordinal will be enumerate from the `CUDA_VISIBLE_DEVICES` environment variable. The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration: See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars - https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices - https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Returns: List[CudaDevice] Returns: List[CudaDevice]
A list of `CudaDevice` instances. A list of ``CudaDevice`` instances.
Raises: Raises:
RuntimeError: RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries). If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
""" """
visible_device_indices = Device.parse_cuda_visible_devices() visible_device_indices = Device.parse_cuda_visible_devices()
@ -294,28 +307,30 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return cuda_devices return cuda_devices
cuda_all = from_cuda_visible_devices
@staticmethod @staticmethod
def from_cuda_indices(cuda_indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']: def from_cuda_indices(cuda_indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']:
"""Returns a list of CUDA devices of the given CUDA indices. """Returns a list of CUDA devices of the given CUDA indices.
The CUDA ordinal will be enumerate from the `CUDA_VISIBLE_DEVICES` environment variable. The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration: See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars - https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices - https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Args: Args:
cuda_indices (list of int): cuda_indices (Iterable[int]):
The value of `CUDA_VISIBLE_DEVICES`, if not given, the value from the environment The value of ``CUDA_VISIBLE_DEVICES``, if not given, the value from the environment
will be used. will be used.
Returns: List[CudaDevice] Returns: List[CudaDevice]
A list of `CudaDevice` of the given CUDA indices. A list of ``CudaDevice`` of the given CUDA indices.
Raises: Raises:
RuntimeError: RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries). If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError: RuntimeError:
If the index is out of range for the given `CUDA_VISIBLE_DEVICES` environment variable. If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
""" """
cuda_devices = Device.from_cuda_visible_devices() cuda_devices = Device.from_cuda_visible_devices()
@ -340,15 +355,15 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
@staticmethod @staticmethod
def parse_cuda_visible_devices(cuda_visible_devices: Optional[str] = None) -> Union[List[int], def parse_cuda_visible_devices(cuda_visible_devices: Optional[str] = None) -> Union[List[int],
List[Tuple[int, int]]]: List[Tuple[int, int]]]:
"""Parses the given `CUDA_VISIBLE_DEVICES` value into NVML device indices. """Parses the given ``CUDA_VISIBLE_DEVICES`` value into NVML device indices.
See also for CUDA Device Enumeration: See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars - https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices - https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Args: Args:
cuda_visible_devices (str or None): cuda_visible_devices (Optional[str]):
The value of the `CUDA_VISIBLE_DEVICES` variable. If not given, the value from the The value of the ``CUDA_VISIBLE_DEVICES`` variable. If not given, the value from the
environment will be used. environment will be used.
Returns: Union[List[int], List[Tuple[int, int]]] Returns: Union[List[int], List[Tuple[int, int]]]
@ -357,7 +372,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Raises: Raises:
RuntimeError: RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries). If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
""" """
if cuda_visible_devices is None: if cuda_visible_devices is None:
@ -375,7 +390,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
@ttl_cache(ttl=300.0) @ttl_cache(ttl=300.0)
def _parse_cuda_visible_devices(cuda_visible_devices: str) -> Union[List[int], def _parse_cuda_visible_devices(cuda_visible_devices: str) -> Union[List[int],
List[Tuple[int, int]]]: List[Tuple[int, int]]]:
"""The underlining implementation for `parse_cuda_visible_devices`. The result will be cached.""" """The underlining implementation for ``parse_cuda_visible_devices``. The result will be cached."""
def from_index_or_uuid(index_or_uuid: Union[int, str]) -> 'Device': def from_index_or_uuid(index_or_uuid: Union[int, str]) -> 'Device':
nonlocal use_integer_identifiers nonlocal use_integer_identifiers
@ -430,15 +445,17 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
bus_id: Optional[str] = None) -> 'Device': bus_id: Optional[str] = None) -> 'Device':
"""Creates a new instance of Device. The type of the result is determined by the given argument. """Creates a new instance of Device. The type of the result is determined by the given argument.
- (index: int) -> PhysicalDevice .. code-block:: python
- (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value - (index: int) -> PhysicalDevice
- (bus_id: str) -> PhysicalDevice - (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
- (bus_id: str) -> PhysicalDevice
Note: This method takes exact 1 non-None argument. Note: This method takes exact 1 non-None argument.
Returns: Union[PhysicalDevice, MigDevice] Returns: Union[PhysicalDevice, MigDevice]
A `PhysicalDevice` instance or a `MigDevice` instance. A ``PhysicalDevice`` instance or a ``MigDevice`` instance.
Raises: Raises:
TypeError: TypeError:
@ -480,7 +497,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
def __init__(self, index: Optional[Union[int, str]] = None, *, def __init__(self, index: Optional[Union[int, str]] = None, *,
uuid: Optional[str] = None, uuid: Optional[str] = None,
bus_id: Optional[str] = None) -> None: bus_id: Optional[str] = None) -> None:
"""Initializes the instance created by `__new__()`.""" """Initializes the instance created by ``__new__()``."""
if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID
index, uuid = None, index index, uuid = None, index
@ -545,12 +562,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
def __getattr__(self, name: str) -> Union[Any, Callable[..., Any]]: def __getattr__(self, name: str) -> Union[Any, Callable[..., Any]]:
"""Get the object attribute. """Get the object attribute.
If the attribute is not defined, make a method from `pynvml.nvmlDeviceGet<AttributeName>(handle)`. If the attribute is not defined, make a method from ``pynvml.nvmlDeviceGet<AttributeName>(handle)``.
The attribute name will be converted to PascalCase string. The attribute name will be converted to PascalCase string.
Raises: Raises:
AttributeError: AttributeError:
If the attribute is not defined in `pynvml.py`. If the attribute is not defined in ``pynvml.py``.
Examples: Examples:
@ -646,9 +663,9 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Raises: Raises:
RuntimeError: RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries). If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError: RuntimeError:
If the current device is not visible to CUDA applications (i.e. not listed in `CUDA_VISIBLE_DEVICES`). If the current device is not visible to CUDA applications (i.e. not listed in ``CUDA_VISIBLE_DEVICES``).
""" """
if self._cuda_index is None: if self._cuda_index is None:
@ -668,10 +685,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The official product name of the GPU. This is an alphanumeric string. For all products. """The official product name of the GPU. This is an alphanumeric string. For all products.
Returns: Union[str, NaType] Returns: Union[str, NaType]
The official product name, or `nvitop.NA` (str: 'N/A') when not available. The official product name, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
""" """
@ -684,10 +703,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
not correspond to any physical label on the board. not correspond to any physical label on the board.
Returns: Union[str, NaType] Returns: Union[str, NaType]
The UUID of the device, or `nvitop.NA` (str: 'N/A') when not available. The UUID of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
""" """
@ -699,10 +720,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""PCI bus ID as "domain:bus:device.function", in hex. """PCI bus ID as "domain:bus:device.function", in hex.
Returns: Union[str, NaType] Returns: Union[str, NaType]
The PCI bus ID of the device, or `nvitop.NA` (str: 'N/A') when not available. The PCI bus ID of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pci.bus_id nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pci.bus_id
""" """
@ -715,10 +738,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
unique immutable alphanumeric value. unique immutable alphanumeric value.
Returns: Union[str, NaType] Returns: Union[str, NaType]
The serial number of the device, or `nvitop.NA` (str: 'N/A') when not available. The serial number of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=serial nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=serial
""" """
@ -730,7 +755,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with memory information (in bytes) for the device. """Returns a named tuple with memory information (in bytes) for the device.
Returns: MemoryInfo(total, free, used) Returns: MemoryInfo(total, free, used)
A named tuple with memory information, the item could be `nvitop.NA` (str: 'N/A') when not available. A named tuple with memory information, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
memory_info = nvml.nvmlQuery('nvmlDeviceGetMemoryInfo', self.handle) memory_info = nvml.nvmlQuery('nvmlDeviceGetMemoryInfo', self.handle)
@ -742,10 +767,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total installed GPU memory in bytes. """Total installed GPU memory in bytes.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total installed GPU memory in bytes, or `nvitop.NA` (str: 'N/A') when not available. Total installed GPU memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.total nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.total
""" """
@ -757,10 +784,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total memory allocated by active contexts in bytes. """Total memory allocated by active contexts in bytes.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total memory allocated by active contexts in bytes, or `nvitop.NA` (str: 'N/A') when not available. Total memory allocated by active contexts in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.used nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.used
""" """
@ -770,10 +799,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free memory in bytes. """Total free memory in bytes.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total free memory in bytes, or `nvitop.NA` (str: 'N/A') when not available. Total free memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.free nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.free
""" """
@ -783,7 +814,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total installed GPU memory in human readable format. """Total installed GPU memory in human readable format.
Returns: Union[str, NaType] Returns: Union[str, NaType]
Total installed GPU memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available. Total installed GPU memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
if self._memory_total_human is NA: if self._memory_total_human is NA:
@ -794,7 +825,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total memory allocated by active contexts in human readable format. """Total memory allocated by active contexts in human readable format.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total memory allocated by active contexts in human readable format, or `nvitop.NA` (str: 'N/A') when not available. Total memory allocated by active contexts in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
return bytes2human(self.memory_used()) return bytes2human(self.memory_used())
@ -803,7 +834,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free memory in human readable format. """Total free memory in human readable format.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total free memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available. Total free memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return bytes2human(self.memory_free()) return bytes2human(self.memory_free())
@ -812,7 +843,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The percentage of used memory over total memory (0 <= p <= 100). """The percentage of used memory over total memory (0 <= p <= 100).
Returns: Union[float, NaType] Returns: Union[float, NaType]
The percentage of used memory over total memory, or `nvitop.NA` (str: 'N/A') when not available. The percentage of used memory over total memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
memory_info = self.memory_info() memory_info = self.memory_info()
@ -835,7 +866,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with BAR1 memory information (in bytes) for the device. """Returns a named tuple with BAR1 memory information (in bytes) for the device.
Returns: MemoryInfo(total, free, used) Returns: MemoryInfo(total, free, used)
A named tuple with BAR1 memory information, the item could be `nvitop.NA` (str: 'N/A') when not available. A named tuple with BAR1 memory information, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
memory_info = nvml.nvmlQuery('nvmlDeviceGetBAR1MemoryInfo', self.handle) memory_info = nvml.nvmlQuery('nvmlDeviceGetBAR1MemoryInfo', self.handle)
@ -847,7 +878,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total BAR1 memory in bytes. """Total BAR1 memory in bytes.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available. Total BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return self.bar1_memory_info().total return self.bar1_memory_info().total
@ -856,7 +887,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total used BAR1 memory in bytes. """Total used BAR1 memory in bytes.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total used BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available. Total used BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return self.bar1_memory_info().used return self.bar1_memory_info().used
@ -865,7 +896,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free BAR1 memory in bytes. """Total free BAR1 memory in bytes.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total free BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available. Total free BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return self.bar1_memory_info().free return self.bar1_memory_info().free
@ -874,7 +905,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total BAR1 memory in human readable format. """Total BAR1 memory in human readable format.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available. Total BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return bytes2human(self.bar1_memory_total()) return bytes2human(self.bar1_memory_total())
@ -883,7 +914,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total used BAR1 memory in human readable format. """Total used BAR1 memory in human readable format.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total used BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available. Total used BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return bytes2human(self.bar1_memory_used()) return bytes2human(self.bar1_memory_used())
@ -892,7 +923,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free BAR1 memory in human readable format. """Total free BAR1 memory in human readable format.
Returns: Union[int, NaType] Returns: Union[int, NaType]
Total free BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available. Total free BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return bytes2human(self.bar1_memory_free()) return bytes2human(self.bar1_memory_free())
@ -901,7 +932,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The percentage of used BAR1 memory over total BAR1 memory (0 <= p <= 100). """The percentage of used BAR1 memory over total BAR1 memory (0 <= p <= 100).
Returns: Union[float, NaType] Returns: Union[float, NaType]
The percentage of used BAR1 memory over total BAR1 memory, or `nvitop.NA` (str: 'N/A') when not available. The percentage of used BAR1 memory over total BAR1 memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
memory_info = self.bar1_memory_info() memory_info = self.bar1_memory_info()
@ -924,7 +955,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with GPU utilization rates (in percentage) for the device. """Returns a named tuple with GPU utilization rates (in percentage) for the device.
Returns: UtilizationRates(gpu, memory, encoder, decoder) Returns: UtilizationRates(gpu, memory, encoder, decoder)
A named tuple with GPU utilization rates (in percentage) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available. A named tuple with GPU utilization rates (in percentage) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
gpu, memory, encoder, decoder = NA, NA, NA, NA gpu, memory, encoder, decoder = NA, NA, NA, NA
@ -948,10 +979,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
The sample period may be between 1 second and 1/6 second depending on the product. The sample period may be between 1 second and 1/6 second depending on the product.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The GPU utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available. The GPU utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.gpu nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.gpu
""" """
@ -964,10 +997,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
The sample period may be between 1 second and 1/6 second depending on the product. The sample period may be between 1 second and 1/6 second depending on the product.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The memory bandwidth utilization rate of the GPU in percentage, or `nvitop.NA` (str: 'N/A') when not available. The memory bandwidth utilization rate of the GPU in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.memory nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.memory
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
@ -977,7 +1012,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The encoder utilization rate in percentage. """The encoder utilization rate in percentage.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The encoder utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available. The encoder utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return self.utilization_rates().encoder return self.utilization_rates().encoder
@ -986,7 +1021,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The decoder utilization rate in percentage. """The decoder utilization rate in percentage.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The decoder utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available. The decoder utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
return self.utilization_rates().decoder return self.utilization_rates().decoder
@ -997,7 +1032,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with current clock speeds (in MHz) for the device. """Returns a named tuple with current clock speeds (in MHz) for the device.
Returns: ClockInfos(graphics, sm, memory, video) Returns: ClockInfos(graphics, sm, memory, video)
A named tuple with current clock speeds (in MHz) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available. A named tuple with current clock speeds (in MHz) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
return ClockInfos( return ClockInfos(
@ -1015,7 +1050,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with maximum clock speeds (in MHz) for the device. """Returns a named tuple with maximum clock speeds (in MHz) for the device.
Returns: ClockInfos(graphics, sm, memory, video) Returns: ClockInfos(graphics, sm, memory, video)
A named tuple with maximum clock speeds (in MHz) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available. A named tuple with maximum clock speeds (in MHz) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
clock_infos = self._max_clock_infos._asdict() clock_infos = self._max_clock_infos._asdict()
@ -1042,10 +1077,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of graphics (shader) clock in MHz. """Current frequency of graphics (shader) clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The current frequency of graphics (shader) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The current frequency of graphics (shader) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.graphics nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.graphics
""" """
@ -1055,10 +1092,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of SM (Streaming Multiprocessor) clock in MHz. """Current frequency of SM (Streaming Multiprocessor) clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The current frequency of SM (Streaming Multiprocessor) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The current frequency of SM (Streaming Multiprocessor) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.sm nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.sm
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
@ -1068,10 +1107,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of memory clock in MHz. """Current frequency of memory clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The current frequency of memory clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The current frequency of memory clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.memory nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.memory
""" """
@ -1081,10 +1122,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of video encoder/decoder clock in MHz. """Current frequency of video encoder/decoder clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The current frequency of video encoder/decoder clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The current frequency of video encoder/decoder clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.video nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.video
""" """
@ -1094,10 +1137,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of graphics (shader) clock in MHz. """Maximum frequency of graphics (shader) clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The maximum frequency of graphics (shader) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The maximum frequency of graphics (shader) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.graphics nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.graphics
""" """
@ -1107,10 +1152,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of SM (Streaming Multiprocessor) clock in MHz. """Maximum frequency of SM (Streaming Multiprocessor) clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The maximum frequency of SM (Streaming Multiprocessor) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The maximum frequency of SM (Streaming Multiprocessor) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.sm nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.sm
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
@ -1120,10 +1167,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of memory clock in MHz. """Maximum frequency of memory clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The maximum frequency of memory clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The maximum frequency of memory clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.memory nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.memory
""" """
@ -1133,10 +1182,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of video encoder/decoder clock in MHz. """Maximum frequency of video encoder/decoder clock in MHz.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The maximum frequency of video encoder/decoder clock in MHz, or `nvitop.NA` (str: 'N/A') when not available. The maximum frequency of video encoder/decoder clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.video nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.video
""" """
@ -1151,10 +1202,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
because they rely on cooling via fans in the surrounding enclosure. because they rely on cooling via fans in the surrounding enclosure.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The fan speed value in percentage, or `nvitop.NA` (str: 'N/A') when not available. The fan speed value in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=fan.speed nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=fan.speed
""" """
@ -1165,10 +1218,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Core GPU temperature. in degrees C. """Core GPU temperature. in degrees C.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The core GPU temperature in Celsius degrees, or `nvitop.NA` (str: 'N/A') when not available. The core GPU temperature in Celsius degrees, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=temperature.gpu nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=temperature.gpu
""" """
@ -1180,10 +1235,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The last measured power draw for the entire board in milliwatts. """The last measured power draw for the entire board in milliwatts.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The power draw for the entire board in milliwatts, or `nvitop.NA` (str: 'N/A') when not available. The power draw for the entire board in milliwatts, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
$(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.draw)" * 1000 )) $(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.draw)" * 1000 ))
""" """
@ -1197,10 +1254,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The software power limit in milliwatts. Set by software like nvidia-smi. """The software power limit in milliwatts. Set by software like nvidia-smi.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The software power limit in milliwatts, or `nvitop.NA` (str: 'N/A') when not available. The software power limit in milliwatts, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
$(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.limit)" * 1000 )) $(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.limit)" * 1000 ))
""" """
@ -1230,10 +1289,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType] Returns: Union[str, NaType]
- 'Disabled': if not an active display device. - 'Disabled': if not an active display device.
- 'Enabled': if an active display device. - 'Enabled': if an active display device.
- `nvitop.NA` (str: 'N/A'): if not available. - ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_active nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_active
""" """
@ -1248,10 +1309,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType] Returns: Union[str, NaType]
- 'Disabled': if the display mode is disabled. - 'Disabled': if the display mode is disabled.
- 'Enabled': if the display mode is enabled. - 'Enabled': if the display mode is enabled.
- `nvitop.NA` (str: 'N/A'): if not available. - ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_mode nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_mode
""" """
@ -1268,10 +1331,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType] Returns: Union[str, NaType]
- 'WDDM': for WDDM driver model on Windows. - 'WDDM': for WDDM driver model on Windows.
- 'WDM': for TTC (WDM) driver model on Windows. - 'WDM': for TTC (WDM) driver model on Windows.
- `nvitop.NA` (str: 'N/A'): if not available, e.g. on Linux. - ``nvitop.NA`` (str: ``'N/A'``): if not available, e.g. on Linux.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=driver_model.current nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=driver_model.current
""" """
@ -1292,10 +1357,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType] Returns: Union[str, NaType]
- 'Disabled': if the persistence mode is disabled. - 'Disabled': if the persistence mode is disabled.
- 'Enabled': if the persistence mode is enabled. - 'Enabled': if the persistence mode is enabled.
- `nvitop.NA` (str: 'N/A'): if not available. - ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=persistence_mode nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=persistence_mode
""" """
@ -1307,10 +1374,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
P12 (minimum performance). P12 (minimum performance).
Returns: Union[str, NaType] Returns: Union[str, NaType]
The current performance state in format `P<int>`, or `nvitop.NA` (str: 'N/A') when not available. The current performance state in format ``P<int>``, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pstate nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pstate
""" """
@ -1324,10 +1393,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total errors detected across entire chip. """Total errors detected across entire chip.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The total number of uncorrected errors in volatile ECC memory, or `nvitop.NA` (str: 'N/A') when not available. The total number of uncorrected errors in volatile ECC memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=ecc.errors.uncorrected.volatile.total nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=ecc.errors.uncorrected.volatile.total
""" # pylint: disable=line-too-long """ # pylint: disable=line-too-long
@ -1345,10 +1416,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
- 'Exclusive Thread': deprecated, use Exclusive Process instead - 'Exclusive Thread': deprecated, use Exclusive Process instead
- 'Prohibited': means no contexts are allowed per device (no compute apps). - 'Prohibited': means no contexts are allowed per device (no compute apps).
- 'Exclusive Process': means only one context is allowed per device, usable from multiple threads at a time. - 'Exclusive Process': means only one context is allowed per device, usable from multiple threads at a time.
- `nvitop.NA` (str: 'N/A'): if not available. - ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=compute_mode nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=compute_mode
""" """
@ -1375,10 +1448,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType] Returns: Union[str, NaType]
- 'Disabled': if the MIG mode is disabled. - 'Disabled': if the MIG mode is disabled.
- 'Enabled': if the MIG mode is enabled. - 'Enabled': if the MIG mode is enabled.
- `nvitop.NA` (str: 'N/A'): if not available, e.g. the GPU does not support MIG mode. - ``nvitop.NA`` (str: ``'N/A'``): if not available, e.g. the GPU does not support MIG mode.
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=mig.mode.current nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=mig.mode.current
""" """
@ -1390,7 +1465,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return {0: 'Disabled', 1: 'Enabled'}.get(mig_mode, NA) return {0: 'Disabled', 1: 'Enabled'}.get(mig_mode, NA)
def is_mig_mode_enabled(self) -> bool: def is_mig_mode_enabled(self) -> bool:
"""Returns whether the MIG mode is enabled on the device. Returns `False` if MIG mode is """Returns whether the MIG mode is enabled on the device. Returns ``False`` if MIG mode is
disabled or the device does not support MIG mode. disabled or the device does not support MIG mode.
""" """
@ -1440,7 +1515,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return processes return processes
def as_snapshot(self) -> Snapshot: def as_snapshot(self) -> Snapshot:
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`.""" """Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
with self.oneshot(): with self.oneshot():
return Snapshot(real=self, index=self.index, physical_index=self.physical_index, return Snapshot(real=self, index=self.index, physical_index=self.physical_index,
@ -1544,6 +1619,8 @@ class PhysicalDevice(Device):
Command line equivalent: Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=index nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=index
""" """
@ -1636,18 +1713,18 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
"""Returns a list of MIG devices of the given indices. """Returns a list of MIG devices of the given indices.
Args: Args:
indices (list of tuple of two ints): indices (Iterable[Tuple[int, int]]):
Indices of the MIG devices. Each index is a tuple of two integers. Indices of the MIG devices. Each index is a tuple of two integers.
Returns: List[MigDevice] Returns: List[MigDevice]
A list of `MigDevice` instances of the given indices. A list of ``MigDevice`` instances of the given indices.
""" """
return list(map(cls, indices)) return list(map(cls, indices))
def __init__(self, index: Optional[Union[Tuple[int, int], str]] = None, *, # pylint: disable=super-init-not-called def __init__(self, index: Optional[Union[Tuple[int, int], str]] = None, *, # pylint: disable=super-init-not-called
uuid: Optional[str] = None) -> None: uuid: Optional[str] = None) -> None:
"""Initializes the instance created by `__new__()`.""" """Initializes the instance created by ``__new__()``."""
if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID
index, uuid = None, index index, uuid = None, index
@ -1727,7 +1804,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
"""The gpu instance ID of the MIG device. """The gpu instance ID of the MIG device.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The gpu instance ID of the MIG device, or `nvitop.NA` (str: 'N/A') when not available. The gpu instance ID of the MIG device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
if self._gpu_instance_id is NA: if self._gpu_instance_id is NA:
@ -1741,7 +1818,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
"""The compute instance ID of the MIG device. """The compute instance ID of the MIG device.
Returns: Union[int, NaType] Returns: Union[int, NaType]
The compute instance ID of the MIG device, or `nvitop.NA` (str: 'N/A') when not available. The compute instance ID of the MIG device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" """
if self._compute_instance_id is NA: if self._compute_instance_id is NA:
@ -1752,7 +1829,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
return self._compute_instance_id return self._compute_instance_id
def as_snapshot(self) -> Snapshot: def as_snapshot(self) -> Snapshot:
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`.""" """Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
snapshot = super().as_snapshot() snapshot = super().as_snapshot()
snapshot.mig_index = self.mig_index snapshot.mig_index = self.mig_index
@ -1764,15 +1841,18 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
class CudaDevice(Device): class CudaDevice(Device):
"""Class for devices enumerated over the CUDA ordinal. The order can be vary for different """Class for devices enumerated over the CUDA ordinal. The order can be vary for different
`CUDA_VISIBLE_DEVICES` environment variable. environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration: See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars - https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices - https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
`CudaDevice.__new__()` returns different types depending on the given arguments. ``CudaDevice.__new__()`` returns different types depending on the given arguments.
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES` .. code-block:: python
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: int) -> CudaDevice - (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice - (nvml_index: (int, int)) -> CudaMigDevice
@ -1808,16 +1888,50 @@ class CudaDevice(Device):
@classmethod @classmethod
def count(cls) -> int: def count(cls) -> int:
"""The number of GPUs visible to CUDA applications.""" """The number of GPUs visible to CUDA applications.
Raises:
RuntimeError:
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
"""
return len(super().parse_cuda_visible_devices()) return len(super().parse_cuda_visible_devices())
@classmethod @classmethod
def all(cls) -> List['CudaDevice']: def all(cls) -> List['CudaDevice']:
"""All CUDA visible devices.
Raises:
RuntimeError:
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
"""
return cls.from_indices() return cls.from_indices()
@classmethod @classmethod
def from_indices(cls, indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']: def from_indices(cls, indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']:
"""Returns a list of CUDA devices of the given CUDA indices.
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Args:
cuda_indices (Iterable[int]):
The value of ``CUDA_VISIBLE_DEVICES``, if not given, the value from the environment
will be used.
Returns: List[CudaDevice]
A list of ``CudaDevice`` of the given CUDA indices.
Raises:
RuntimeError:
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError:
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
"""
return super().from_cuda_indices(indices) return super().from_cuda_indices(indices)
def __new__(cls, cuda_index: Optional[int] = None, *, def __new__(cls, cuda_index: Optional[int] = None, *,
@ -1825,15 +1939,17 @@ class CudaDevice(Device):
uuid: Optional[str] = None) -> 'Device': uuid: Optional[str] = None) -> 'Device':
"""Creates a new instance of CudaDevice. The type of the result is determined by the given argument. """Creates a new instance of CudaDevice. The type of the result is determined by the given argument.
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES` .. code-block:: python
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (nvml_index: int) -> CudaDevice - (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: (int, int)) -> CudaMigDevice - (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice
Note: This method takes exact 1 non-None argument. Note: This method takes exact 1 non-None argument.
Returns: Union[CudaDevice, CudaMigDevice] Returns: Union[CudaDevice, CudaMigDevice]
A `CudaDevice` instance or a `CudaMigDevice` instance. A ``CudaDevice`` instance or a ``CudaMigDevice`` instance.
Raises: Raises:
TypeError: TypeError:
@ -1842,9 +1958,9 @@ class CudaDevice(Device):
If the given index is a tuple but is not consist of two integers. If the given index is a tuple but is not consist of two integers.
Raises: Raises:
RuntimeError: RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries). If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError: RuntimeError:
If the index is out of range for the given `CUDA_VISIBLE_DEVICES` environment variable. If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
""" """
if cuda_index is not None and nvml_index is None and uuid is None: if cuda_index is not None and nvml_index is None and uuid is None:
@ -1861,7 +1977,7 @@ class CudaDevice(Device):
def __init__(self, cuda_index: Optional[int] = None, *, def __init__(self, cuda_index: Optional[int] = None, *,
nvml_index: Optional[Union[int, Tuple[int, int]]] = None, nvml_index: Optional[Union[int, Tuple[int, int]]] = None,
uuid: Optional[str] = None) -> None: uuid: Optional[str] = None) -> None:
"""Initializes the instance created by `__new__()`. """Initializes the instance created by ``__new__()``.
Raises: Raises:
RuntimeError: RuntimeError:
@ -1895,7 +2011,7 @@ class CudaDevice(Device):
return self.__class__, (self._cuda_index,) return self.__class__, (self._cuda_index,)
def as_snapshot(self) -> Snapshot: def as_snapshot(self) -> Snapshot:
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`.""" """Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
snapshot = super().as_snapshot() snapshot = super().as_snapshot()
snapshot.cuda_index = self.cuda_index snapshot.cuda_index = self.cuda_index

View file

@ -1,7 +1,7 @@
# This file is part of nvitop, the interactive NVIDIA-GPU process viewer. # This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
# License: GNU GPL version 3. # License: GNU GPL version 3.
"""Shortcuts for package `psutil`. """Shortcuts for package ``psutil``.
psutil is a cross-platform library for retrieving information on running processes psutil is a cross-platform library for retrieving information on running processes
and system utilization (CPU, memory, disks, network, sensors) in Python. and system utilization (CPU, memory, disks, network, sensors) in Python.
@ -53,11 +53,11 @@ def swap_percent():
ppid_map = _psutil._ppid_map # pylint: disable=protected-access ppid_map = _psutil._ppid_map # pylint: disable=protected-access
"""Obtains a `{pid: ppid, ...}` dict for all running processes in one shot.""" """Obtains a ``{pid: ppid, ...}`` dict for all running processes in one shot."""
def reverse_ppid_map(): # pylint: disable=function-redefined def reverse_ppid_map(): # pylint: disable=function-redefined
"""Obtains a `{ppid: [pid, ...], ...}` dict for all running processes in one shot.""" """Obtains a ``{ppid: [pid, ...], ...}`` dict for all running processes in one shot."""
tree = _defaultdict(list) tree = _defaultdict(list)
for pid, ppid in ppid_map().items(): for pid, ppid in ppid_map().items():

View file

@ -18,11 +18,11 @@ import pynvml
from nvitop.core.utils import NA, colored from nvitop.core.utils import NA, colored
__all__ = ['nvml', 'nvmlCheckReturn', 'NVMLError'] __all__ = ['libnvml', 'nvml', 'nvmlCheckReturn', 'NVMLError']
class libnvml: class libnvml:
"""The helper singleton class that holds members from package `nvidia-ml-py`.""" """The helper singleton class that holds members from package ``nvidia-ml-py``."""
NVMLError = pynvml.NVMLError NVMLError = pynvml.NVMLError
"""Base exception class for NVML query errors.""" """Base exception class for NVML query errors."""
@ -34,7 +34,7 @@ class libnvml:
c_nvmlDevice_t = pynvml.c_nvmlDevice_t c_nvmlDevice_t = pynvml.c_nvmlDevice_t
def __new__(cls) -> 'libnvml': def __new__(cls) -> 'libnvml':
"""Gets the singleton instance of `libnvml`.""" """Gets the singleton instance of ``libnvml``."""
if not hasattr(cls, '_instance'): if not hasattr(cls, '_instance'):
instance = cls._instance = super().__new__(cls) instance = cls._instance = super().__new__(cls)
@ -62,13 +62,13 @@ class libnvml:
pass pass
def __enter__(self) -> 'libnvml': def __enter__(self) -> 'libnvml':
"""Entry of the context manager for `with` statement.""" """Entry of the context manager for ``with`` statement."""
self._lazy_init() self._lazy_init()
return self return self
def __exit__(self, *args, **kwargs) -> None: def __exit__(self, *args, **kwargs) -> None:
"""Shutdowns the NVML context in the context manager for `with` statement.""" """Shutdowns the NVML context in the context manager for ``with`` statement."""
self.__del__() self.__del__()
@ -100,8 +100,8 @@ class libnvml:
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
driver without reloading the kernel module. driver without reloading the kernel module.
AttributeError: AttributeError:
If cannot find function `nvmlInitWithFlags`, usually the `pynvml` module is overridden If cannot find function ``nvmlInitWithFlags``, usually the ``pynvml`` module is overridden
by other modules. Need to reinstall package `nvidia-ml-py`. by other modules. Need to reinstall package ``nvidia-ml-py``.
""" """
self.nvmlInitWithFlags(0) self.nvmlInitWithFlags(0)
@ -118,8 +118,8 @@ class libnvml:
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
driver without reloading the kernel module. driver without reloading the kernel module.
AttributeError: AttributeError:
If cannot find function `nvmlInitWithFlags`, usually the `pynvml` module is overridden If cannot find function ``nvmlInitWithFlags``, usually the ``pynvml`` module is overridden
by other modules. Need to reinstall package `nvidia-ml-py`. by other modules. Need to reinstall package ``nvidia-ml-py``.
""" """
with self._lock: with self._lock:
@ -179,7 +179,7 @@ class libnvml:
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
driver without reloading the kernel module. driver without reloading the kernel module.
NVMLError_Uninitialized: NVMLError_Uninitialized:
If NVML was not first initialized with `nvmlInit()`. If NVML was not first initialized with ``nvmlInit()``.
""" """
pynvml.nvmlShutdown() pynvml.nvmlShutdown()
@ -199,16 +199,16 @@ class libnvml:
"""Calls a function with the given arguments from NVML. The NVML context will be lazily initialized. """Calls a function with the given arguments from NVML. The NVML context will be lazily initialized.
Args: Args:
func (function or str): func (Union[Callable[..., Any], str]):
The function to call. If it is given by string, lookup for the The function to call. If it is given by string, lookup for the
function first from `pynvml`. function first from ``pynvml``.
default (any): default (Any):
The default value if the query fails. The default value if the query fails.
ignore_errors (bool): ignore_errors (bool):
Whether to ignore errors and return the default value. Whether to ignore errors and return the default value.
ignore_function_not_found (bool): ignore_function_not_found (bool):
Whether to ignore function not found errors and return the Whether to ignore function not found errors and return the
default value. If set to `False`, a error message will be logged default value. If set to ``False``, a error message will be logged
to the logger. to the logger.
*args: *args:
Positional arguments to pass to the query function. Positional arguments to pass to the query function.
@ -251,7 +251,7 @@ class libnvml:
@staticmethod @staticmethod
def nvmlCheckReturn(retval: Any, types: Optional[Union[type, Tuple[type, ...]]] = None) -> bool: def nvmlCheckReturn(retval: Any, types: Optional[Union[type, Tuple[type, ...]]] = None) -> bool:
"""Checks the return value is not `nvitop.NA` and is one of the given types.""" """Checks the return value is not ``nvitop.NA`` and is one of the given types."""
if types is None: if types is None:
return retval != NA return retval != NA
@ -259,7 +259,7 @@ class libnvml:
nvml = libnvml() nvml = libnvml()
"""The singleton instance of `libnvml`.""" """The singleton instance of class ``libnvml``."""
nvmlCheckReturn = nvml.nvmlCheckReturn nvmlCheckReturn = nvml.nvmlCheckReturn

View file

@ -81,7 +81,7 @@ _USE_FALLBACK_WHEN_RAISE = threading.local() # see also `GpuProcess.failsafe`
def auto_garbage_clean(fallback=_RAISE): def auto_garbage_clean(fallback=_RAISE):
"""Removes the object references in the instance cache if the method call fails (the process is gone). """Removes the object references in the instance cache if the method call fails (the process is gone).
The fallback value will be used with `GpuProcess.failsafe` context manager, otherwise raises an The fallback value will be used with ``GpuProcess.failsafe`` context manager, otherwise raises an
exception when falls. exception when falls.
""" """
@ -161,7 +161,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
HOST_SNAPSHOTS = {} HOST_SNAPSHOTS = {}
def __new__(cls, pid: Optional[int] = None) -> 'HostProcess': def __new__(cls, pid: Optional[int] = None) -> 'HostProcess':
"""Returns the cached instance of `HostProcess`.""" """Returns the cached instance of ``HostProcess``."""
if pid is None: if pid is None:
pid = os.getpid() pid = os.getpid()
@ -248,7 +248,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
@memoize_when_activated @memoize_when_activated
def running_time(self) -> datetime.timedelta: def running_time(self) -> datetime.timedelta:
"""The elapsed time this process has been running in `datetime.timedelta`.""" """The elapsed time this process has been running in ``datetime.timedelta``."""
return datetime.datetime.now() - datetime.datetime.fromtimestamp(self.create_time()) return datetime.datetime.now() - datetime.datetime.fromtimestamp(self.create_time())
@ -272,7 +272,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
return self.memory_info().rss return self.memory_info().rss
def parent(self) -> Union['HostProcess', None]: def parent(self) -> Union['HostProcess', None]:
"""Returns the parent process as a `HostProcess` instance. Returns `None` if there is no parent.""" """Returns the parent process as a ``HostProcess`` instance. Returns ``None`` if there is no parent."""
parent = super().parent() parent = super().parent()
if parent is not None: if parent is not None:
@ -280,8 +280,8 @@ class HostProcess(host.Process, metaclass=ABCMeta):
return None return None
def children(self, recursive: bool = False) -> List['HostProcess']: def children(self, recursive: bool = False) -> List['HostProcess']:
"""Return the children of this process as a list of `HostProcess` instances. """Return the children of this process as a list of ``HostProcess`` instances.
If *recursive* is `True` return all the descendants. If *recursive* is ``True`` return all the descendants.
""" """
return [HostProcess(child.pid) for child in super().children(recursive)] return [HostProcess(child.pid) for child in super().children(recursive)]
@ -294,7 +294,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
Internally different process info (e.g. name, ppid, uids, gids, ...) may be fetched by using Internally different process info (e.g. name, ppid, uids, gids, ...) may be fetched by using
the same routine, but only one information is returned and the others are discarded. When the same routine, but only one information is returned and the others are discarded. When
using this context manager the internal routine is executed once (in the example below on using this context manager the internal routine is executed once (in the example below on
`name()`) and the other info are cached. ``name()``) and the other info are cached.
The cache is cleared when exiting the context manager block. The advice is to use this every The cache is cleared when exiting the context manager block. The advice is to use this every
time you retrieve more than one information about the process. time you retrieve more than one information about the process.
@ -345,7 +345,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
"""Represents a process with the given PID running on the given GPU device. """Represents a process with the given PID running on the given GPU device.
The instance will be cache during the lifetime of the process. The instance will be cache during the lifetime of the process.
The same host process can use multiple GPU devices. The `GpuProcess` instances representing the The same host process can use multiple GPU devices. The ``GpuProcess`` instances representing the
same PID on the host but different GPU devices are different. same PID on the host but different GPU devices are different.
""" """
@ -355,7 +355,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
def __new__(cls, pid: int, device: 'Device', def __new__(cls, pid: int, device: 'Device',
gpu_memory: Optional[Union[int, NaType]] = None, # pylint: disable=unused-argument gpu_memory: Optional[Union[int, NaType]] = None, # pylint: disable=unused-argument
type: Optional[Union[str, NaType]] = None) -> 'GpuProcess': # pylint: disable=unused-argument,redefined-builtin type: Optional[Union[str, NaType]] = None) -> 'GpuProcess': # pylint: disable=unused-argument,redefined-builtin
"""Returns the cached instance of `GpuProcess`.""" """Returns the cached instance of ``GpuProcess``."""
if pid is None: if pid is None:
pid = os.getpid() pid = os.getpid()
@ -385,7 +385,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
def __init__(self, pid: int, device: 'Device', # pylint: disable=unused-argument def __init__(self, pid: int, device: 'Device', # pylint: disable=unused-argument
gpu_memory: Optional[Union[int, NaType]] = None, gpu_memory: Optional[Union[int, NaType]] = None,
type: Optional[Union[str, NaType]] = None) -> None: # pylint: disable=redefined-builtin type: Optional[Union[str, NaType]] = None) -> None: # pylint: disable=redefined-builtin
"""Initializes the instance returned by `__new__()`.""" """Initializes the instance returned by ``__new__()``."""
if gpu_memory is None and not hasattr(self, '_gpu_memory'): if gpu_memory is None and not hasattr(self, '_gpu_memory'):
gpu_memory = NA gpu_memory = NA
@ -456,54 +456,54 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
"""The GPU device the process running on. """The GPU device the process running on.
The same host process can use multiple GPU devices. The same host process can use multiple GPU devices.
The `GpuProcess` instances representing the same PID on the host The ``GpuProcess`` instances representing the same PID on the host
but different GPU devices are different. but different GPU devices are different.
""" """
return self._device return self._device
def gpu_instance_id(self) -> Union[int, NaType]: def gpu_instance_id(self) -> Union[int, NaType]:
"""The GPU instance ID of the MIG device, or `nvitop.NA` if not available.""" """The GPU instance ID of the MIG device, or ``nvitop.NA`` if not available."""
return self._gpu_instance_id return self._gpu_instance_id
def compute_instance_id(self) -> Union[int, NaType]: def compute_instance_id(self) -> Union[int, NaType]:
"""The compute instance ID of the MIG device, or `nvitop.NA` if not available.""" """The compute instance ID of the MIG device, or ``nvitop.NA`` if not available."""
return self._compute_instance_id return self._compute_instance_id
def gpu_memory(self) -> Union[int, NaType]: # in bytes def gpu_memory(self) -> Union[int, NaType]: # in bytes
"""The used GPU memory in bytes, or `nvitop.NA` if not available.""" """The used GPU memory in bytes, or ``nvitop.NA`` if not available."""
return self._gpu_memory return self._gpu_memory
def gpu_memory_human(self) -> Union[str, NaType]: # in human readable def gpu_memory_human(self) -> Union[str, NaType]: # in human readable
"""The used GPU memory in human readable format, or `nvitop.NA` if not available.""" """The used GPU memory in human readable format, or ``nvitop.NA`` if not available."""
return self._gpu_memory_human return self._gpu_memory_human
def gpu_memory_percent(self) -> Union[float, NaType]: # in percentage def gpu_memory_percent(self) -> Union[float, NaType]: # in percentage
"""The percentage of used GPU memory by the process, or `nvitop.NA` if not available.""" """The percentage of used GPU memory by the process, or ``nvitop.NA`` if not available."""
return self._gpu_memory_percent return self._gpu_memory_percent
def gpu_sm_utilization(self) -> Union[int, NaType]: # in percentage def gpu_sm_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of SM (Streaming Multiprocessor), or `nvitop.NA` if not available.""" """The utilization rate of SM (Streaming Multiprocessor), or ``nvitop.NA`` if not available."""
return self._gpu_sm_utilization return self._gpu_sm_utilization
def gpu_memory_utilization(self) -> Union[int, NaType]: # in percentage def gpu_memory_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of GPU memory bandwidth, or `nvitop.NA` if not available.""" """The utilization rate of GPU memory bandwidth, or ``nvitop.NA`` if not available."""
return self._gpu_memory_utilization return self._gpu_memory_utilization
def gpu_encoder_utilization(self) -> Union[int, NaType]: # in percentage def gpu_encoder_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of the encoder, or `nvitop.NA` if not available.""" """The utilization rate of the encoder, or ``nvitop.NA`` if not available."""
return self._gpu_encoder_utilization return self._gpu_encoder_utilization
def gpu_decoder_utilization(self) -> Union[int, NaType]: # in percentage def gpu_decoder_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of the decoder, or `nvitop.NA` if not available.""" """The utilization rate of the decoder, or ``nvitop.NA`` if not available."""
return self._gpu_decoder_utilization return self._gpu_decoder_utilization
@ -583,7 +583,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
@auto_garbage_clean(fallback=NA) @auto_garbage_clean(fallback=NA)
def running_time(self) -> Union[datetime.timedelta, NaType]: def running_time(self) -> Union[datetime.timedelta, NaType]:
"""The elapsed time this process has been running in `datetime.timedelta`.""" """The elapsed time this process has been running in ``datetime.timedelta``."""
return self.host.running_time() return self.host.running_time()
@ -733,10 +733,10 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
@classmethod @classmethod
def take_snapshots(cls, gpu_processes: Iterable['GpuProcess'], *, # batched version of `as_snapshot` def take_snapshots(cls, gpu_processes: Iterable['GpuProcess'], *, # batched version of `as_snapshot`
failsafe=False) -> List[Snapshot]: failsafe=False) -> List[Snapshot]:
"""Takes snapshots for a list of `GpuProcess` instances. """Takes snapshots for a list of ``GpuProcess`` instances.
If *failsafe* is `True`, then if any method fails, the fallback value in If *failsafe* is ``True``, then if any method fails, the fallback value in
`auto_garbage_clean(fallback)` will be used. ``auto_garbage_clean(fallback)`` will be used.
""" """
cache = {} cache = {}

View file

@ -77,28 +77,28 @@ class NotApplicableType(str):
return math.nan return math.nan
def __lt__(self, x): def __lt__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string.""" """The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)): if isinstance(x, (int, float)):
return False return False
return super().__lt__(x) return super().__lt__(x)
def __le__(self, x): def __le__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string.""" """The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)): if isinstance(x, (int, float)):
return False return False
return super().__le__(x) return super().__le__(x)
def __gt__(self, x): def __gt__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string.""" """The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)): if isinstance(x, (int, float)):
return True return True
return super().__gt__(x) return super().__gt__(x)
def __ge__(self, x): def __ge__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string.""" """The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)): if isinstance(x, (int, float)):
return True return True
@ -116,7 +116,7 @@ class NotApplicableType(str):
# NA is NotApplicableType() -> True (NotApplicableType is a singleton class) # NA is NotApplicableType() -> True (NotApplicableType is a singleton class)
NaType = NotApplicableType NaType = NotApplicableType
NA = NotApplicable = NotApplicableType() NA = NotApplicable = NotApplicableType()
"""The singleton instance of `NotApplicableType`. The actual value is 'NA'.""" """The singleton instance of ``NotApplicableType``. The actual value is 'NA'."""
KiB = 1 << 10 KiB = 1 << 10
@ -167,7 +167,7 @@ def bytes2human(x): # pylint: disable=too-many-return-statements
def timedelta2human(dt): def timedelta2human(dt):
"""Converts `datetime.timedelta` instance to a human readable string.""" """Converts ``datetime.timedelta`` instance to a human readable string."""
if isinstance(dt, (int, float)): if isinstance(dt, (int, float)):
dt = datetime.timedelta(seconds=dt) dt = datetime.timedelta(seconds=dt)
@ -209,7 +209,7 @@ def boolify(string, default=None):
class Snapshot: class Snapshot:
"""A dict-like object holds the snapshot values. """A dict-like object holds the snapshot values.
The value can be accessed by `snapshot.name` or `snapshot[name]` syntax. The value can be accessed by ``snapshot.name`` or ``snapshot['name']`` syntax.
Missing attributes will be automatically fetched from the original object. Missing attributes will be automatically fetched from the original object.
""" """
@ -253,7 +253,7 @@ class Snapshot:
return attribute return attribute
def __getitem__(self, name): def __getitem__(self, name):
"""Supports `dict[name]` syntax.""" """Supports ``dict['name']`` syntax."""
try: try:
return self.__getattr__(name) return self.__getattr__(name)
@ -261,7 +261,7 @@ class Snapshot:
raise KeyError from e raise KeyError from e
def __setitem__(self, name, value): def __setitem__(self, name, value):
"""Supports `dict[name] = value` syntax.""" """Supports ``dict['name'] = value`` syntax."""
self.__setattr__(name, value) self.__setattr__(name, value)