docs: add Sphinx-based documents

Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
Xuehai Pan 2022-07-01 12:02:09 +08:00
parent 3bb17f6cc9
commit 102ee45960
24 changed files with 635 additions and 172 deletions

20
docs/Makefile Normal file
View file

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

35
docs/make.bat Normal file
View file

@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

6
docs/requirements.txt Normal file
View file

@ -0,0 +1,6 @@
sphinx
sphinx-rtd-theme
sphinx-copybutton
pytorch-lightning >= 1.5.0
tensorflow >= 2.0

View file

View file

@ -0,0 +1,3 @@
.wy-nav-content {
max-width: none;
}

View file

View file

@ -0,0 +1,45 @@
nvitop.callbacks package
========================
Submodules
----------
nvitop.callbacks.keras module
-----------------------------
.. automodule:: nvitop.callbacks.keras
:members:
:undoc-members:
:show-inheritance:
nvitop.callbacks.pytorch\_lightning module
------------------------------------------
.. automodule:: nvitop.callbacks.pytorch_lightning
:members:
:undoc-members:
:show-inheritance:
nvitop.callbacks.tensorboard module
-----------------------------------
.. automodule:: nvitop.callbacks.tensorboard
:members:
:undoc-members:
:show-inheritance:
nvitop.callbacks.utils module
-----------------------------
.. automodule:: nvitop.callbacks.utils
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: nvitop.callbacks
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,8 @@
nvitop.core.collector module
----------------------------
.. automodule:: nvitop.core.collector
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.device module
-------------------------
.. automodule:: nvitop.core.device
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.host module
-----------------------
.. automodule:: nvitop.core.host
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.libnvml module
--------------------------
.. automodule:: nvitop.core.libnvml
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.process module
--------------------------
.. automodule:: nvitop.core.process
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,8 @@
nvitop.core.utils module
------------------------
.. automodule:: nvitop.core.utils
:members:
:undoc-members:
:show-inheritance:
:member-order: bysource

View file

@ -0,0 +1,24 @@
nvitop package
==============
Subpackages
-----------
.. toctree::
:maxdepth: 4
core/device
core/process
core/host
core/collector
core/libnvml
core/utils
callbacks
Module contents
---------------
.. automodule:: nvitop
:members:
:undoc-members:
:show-inheritance:

136
docs/source/conf.py Normal file
View file

@ -0,0 +1,136 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
# -- Project information -----------------------------------------------------
project = 'nvitop: the one-stop solution for GPU process management.'
copyright = '2022, Xuehai Pan'
author = 'Xuehai Pan'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.napoleon',
'sphinx.ext.mathjax',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
'sphinx.ext.extlinks',
'sphinx_copybutton',
'sphinx_rtd_theme',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = 'en'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'default'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = [
'style.css',
]
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
extlinks = {
'gitcode': ('https://github.com/XuehaiPan/nvitop/blob/HEAD/%s', '')
}
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'nvitop', 'An interactive NVIDIA-GPU process viewer.',
[author], 1)
]
# -- Extension configuration -------------------------------------------------
# -- Options for napoleon extension ------------------------------------------
napoleon_include_init_with_doc = True
napoleon_include_private_with_doc = False
napoleon_include_special_with_doc = True
# -- Options for intersphinx extension ---------------------------------------
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
# -- Options for todo extension ----------------------------------------------
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True

18
docs/source/index.rst Normal file
View file

@ -0,0 +1,18 @@
Welcome to nvitop's documentation!
==================================
An interactive NVIDIA-GPU process viewer, the one-stop solution for GPU process management.
.. toctree::
:maxdepth: 4
:caption: Contents:
apis/index
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View file

@ -22,15 +22,21 @@ class GpuStatsLogger(Callback): # pylint: disable=too-many-instance-attributes
a CSVLogger callback to the model.
Args:
memory_utilization: Set to ``True`` to log used, free and the percentage of memory
memory_utilization (bool):
Set to ``True`` to log used, free and the percentage of memory
utilization at the start and end of each step. Default: ``True``.
gpu_utilization: Set to ``True`` to log the percentage of GPU utilization
gpu_utilization (bool):
Set to ``True`` to log the percentage of GPU utilization
at the start and end of each step. Default: ``True``.
intra_step_time: Set to ``True`` to log the time of each step. Default: ``False``.
inter_step_time: Set to ``True`` to log the time between the end of one step
intra_step_time (bool):
Set to ``True`` to log the time of each step. Default: ``False``.
inter_step_time (bool):
Set to ``True`` to log the time between the end of one step
and the start of the next step. Default: ``False``.
fan_speed: Set to ``True`` to log percentage of fan speed. Default: ``False``.
temperature: Set to ``True`` to log the gpu temperature in degree Celsius.
fan_speed (bool):
Set to ``True`` to log percentage of fan speed. Default: ``False``.
temperature (bool):
Set to ``True`` to log the gpu temperature in degree Celsius.
Default: ``False``.
Raises:

View file

@ -22,15 +22,21 @@ class GpuStatsLogger(Callback): # pylint: disable=too-many-instance-attributes
callback and in order to use it you need to assign a logger in the ``Trainer``.
Args:
memory_utilization: Set to ``True`` to log used, free and the percentage of memory
memory_utilization (bool):
Set to ``True`` to log used, free and the percentage of memory
utilization at the start and end of each step. Default: ``True``.
gpu_utilization: Set to ``True`` to log the percentage of GPU utilization
gpu_utilization (bool):
Set to ``True`` to log the percentage of GPU utilization
at the start and end of each step. Default: ``True``.
intra_step_time: Set to ``True`` to log the time of each step. Default: ``False``.
inter_step_time: Set to ``True`` to log the time between the end of one step
intra_step_time (bool):
Set to ``True`` to log the time of each step. Default: ``False``.
inter_step_time (bool):
Set to ``True`` to log the time between the end of one step
and the start of the next step. Default: ``False``.
fan_speed: Set to ``True`` to log percentage of fan speed. Default: ``False``.
temperature: Set to ``True`` to log the gpu temperature in degree Celsius.
fan_speed (bool):
Set to ``True`` to log percentage of fan speed. Default: ``False``.
temperature (bool):
Set to ``True`` to log the gpu temperature in degree Celsius.
Default: ``False``.
Raises:

View file

@ -159,10 +159,10 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
"""A class for collecting resource metrics.
Args:
devices (iterable of Device):
devices (Iterable[Device]):
Set of Device instances for logging. If not given, all physical
devices on board will be used.
root_pids (set of int):
root_pids (Set[int]):
A set of PIDs, only the status of the children processes on the GPUs
will be collected. If not given, the PID of the current process will
be used.
@ -422,7 +422,7 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
the sub-collections will be reset as well.
Args:
tag (str or None):
tag (Optional[str]):
The tag to reset. If None, the current active collection
will be reset.

View file

@ -3,18 +3,24 @@
"""The live classes for GPU devices.
The core classes are `Device` and `CudaDevice`. The type of returned instance created by `Class(args)`
The core classes are ``Device`` and ``CudaDevice``. The type of returned instance created by ``Class(args)``
is depending on the given arguments.
`Device()` returns:
``Device()`` returns:
.. code-block:: python
- (index: int) -> PhysicalDevice
- (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
- (bus_id: str) -> PhysicalDevice
`CudaDevice()` returns:
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
``CudaDevice()`` returns:
.. code-block:: python
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice
@ -127,7 +133,7 @@ def _does_any_device_support_mig_mode() -> bool:
def is_mig_device_uuid(uuid: Optional[str]) -> bool:
"""Returns `True` if the argument is a MIG device UUID, otherwise, returns `False`."""
"""Returns ``True`` if the argument is a MIG device UUID, otherwise, returns ``False``."""
if isinstance(uuid, str):
match = Device.UUID_PATTERN.match(uuid)
@ -139,7 +145,10 @@ def is_mig_device_uuid(uuid: Optional[str]) -> bool:
class Device: # pylint: disable=too-many-instance-attributes,too-many-public-methods
"""Live class of the GPU devices, different from the device snapshots.
`Device.__new__()` returns different types depending on the given arguments.
``Device.__new__()`` returns different types depending on the given arguments.
.. code-block:: python
- (index: int) -> PhysicalDevice
- (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
@ -206,6 +215,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Command line equivalent:
.. code:: bash
nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=driver_version
"""
@ -233,6 +244,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Command line equivalent:
.. code:: bash
nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=count
"""
@ -251,14 +264,14 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a list of devices of the given indices.
Args:
indices (list of int or tuple of two ints):
Indices of the devices. For each index, get `PhysicalDevice` for single int
and `MigDevice` for tuple (int, int). That is:
indices (Iterable[Union[int, Tuple[int, int]]]):
Indices of the devices. For each index, get ``PhysicalDevice`` for single int
and ``MigDevice`` for tuple (int, int). That is:
- (int) -> PhysicalDevice
- ((int, int)) -> MigDevice
Returns: List[Union[PhysicalDevice, MigDevice]]
A list of `PhysicalDevice` and/or `MigDevice` instances of the given indices.
A list of ``PhysicalDevice`` and/or ``MigDevice`` instances of the given indices.
"""
if indices is None:
@ -271,19 +284,19 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
@staticmethod
def from_cuda_visible_devices() -> List['CudaDevice']:
"""Returns a list of CUDA devices of the given CUDA indices.
The CUDA ordinal will be enumerate from the `CUDA_VISIBLE_DEVICES` environment variable.
"""Returns a list of all CUDA visible devices.
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Returns: List[CudaDevice]
A list of `CudaDevice` instances.
A list of ``CudaDevice`` instances.
Raises:
RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
"""
visible_device_indices = Device.parse_cuda_visible_devices()
@ -294,28 +307,30 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return cuda_devices
cuda_all = from_cuda_visible_devices
@staticmethod
def from_cuda_indices(cuda_indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']:
"""Returns a list of CUDA devices of the given CUDA indices.
The CUDA ordinal will be enumerate from the `CUDA_VISIBLE_DEVICES` environment variable.
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Args:
cuda_indices (list of int):
The value of `CUDA_VISIBLE_DEVICES`, if not given, the value from the environment
cuda_indices (Iterable[int]):
The value of ``CUDA_VISIBLE_DEVICES``, if not given, the value from the environment
will be used.
Returns: List[CudaDevice]
A list of `CudaDevice` of the given CUDA indices.
A list of ``CudaDevice`` of the given CUDA indices.
Raises:
RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError:
If the index is out of range for the given `CUDA_VISIBLE_DEVICES` environment variable.
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
"""
cuda_devices = Device.from_cuda_visible_devices()
@ -340,15 +355,15 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
@staticmethod
def parse_cuda_visible_devices(cuda_visible_devices: Optional[str] = None) -> Union[List[int],
List[Tuple[int, int]]]:
"""Parses the given `CUDA_VISIBLE_DEVICES` value into NVML device indices.
"""Parses the given ``CUDA_VISIBLE_DEVICES`` value into NVML device indices.
See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Args:
cuda_visible_devices (str or None):
The value of the `CUDA_VISIBLE_DEVICES` variable. If not given, the value from the
cuda_visible_devices (Optional[str]):
The value of the ``CUDA_VISIBLE_DEVICES`` variable. If not given, the value from the
environment will be used.
Returns: Union[List[int], List[Tuple[int, int]]]
@ -357,7 +372,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Raises:
RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
"""
if cuda_visible_devices is None:
@ -375,7 +390,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
@ttl_cache(ttl=300.0)
def _parse_cuda_visible_devices(cuda_visible_devices: str) -> Union[List[int],
List[Tuple[int, int]]]:
"""The underlining implementation for `parse_cuda_visible_devices`. The result will be cached."""
"""The underlining implementation for ``parse_cuda_visible_devices``. The result will be cached."""
def from_index_or_uuid(index_or_uuid: Union[int, str]) -> 'Device':
nonlocal use_integer_identifiers
@ -430,15 +445,17 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
bus_id: Optional[str] = None) -> 'Device':
"""Creates a new instance of Device. The type of the result is determined by the given argument.
- (index: int) -> PhysicalDevice
- (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
- (bus_id: str) -> PhysicalDevice
.. code-block:: python
- (index: int) -> PhysicalDevice
- (index: (int, int)) -> MigDevice
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
- (bus_id: str) -> PhysicalDevice
Note: This method takes exact 1 non-None argument.
Returns: Union[PhysicalDevice, MigDevice]
A `PhysicalDevice` instance or a `MigDevice` instance.
A ``PhysicalDevice`` instance or a ``MigDevice`` instance.
Raises:
TypeError:
@ -480,7 +497,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
def __init__(self, index: Optional[Union[int, str]] = None, *,
uuid: Optional[str] = None,
bus_id: Optional[str] = None) -> None:
"""Initializes the instance created by `__new__()`."""
"""Initializes the instance created by ``__new__()``."""
if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID
index, uuid = None, index
@ -545,12 +562,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
def __getattr__(self, name: str) -> Union[Any, Callable[..., Any]]:
"""Get the object attribute.
If the attribute is not defined, make a method from `pynvml.nvmlDeviceGet<AttributeName>(handle)`.
If the attribute is not defined, make a method from ``pynvml.nvmlDeviceGet<AttributeName>(handle)``.
The attribute name will be converted to PascalCase string.
Raises:
AttributeError:
If the attribute is not defined in `pynvml.py`.
If the attribute is not defined in ``pynvml.py``.
Examples:
@ -646,9 +663,9 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Raises:
RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError:
If the current device is not visible to CUDA applications (i.e. not listed in `CUDA_VISIBLE_DEVICES`).
If the current device is not visible to CUDA applications (i.e. not listed in ``CUDA_VISIBLE_DEVICES``).
"""
if self._cuda_index is None:
@ -668,10 +685,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The official product name of the GPU. This is an alphanumeric string. For all products.
Returns: Union[str, NaType]
The official product name, or `nvitop.NA` (str: 'N/A') when not available.
The official product name, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
"""
@ -684,10 +703,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
not correspond to any physical label on the board.
Returns: Union[str, NaType]
The UUID of the device, or `nvitop.NA` (str: 'N/A') when not available.
The UUID of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
"""
@ -699,10 +720,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""PCI bus ID as "domain:bus:device.function", in hex.
Returns: Union[str, NaType]
The PCI bus ID of the device, or `nvitop.NA` (str: 'N/A') when not available.
The PCI bus ID of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pci.bus_id
"""
@ -715,10 +738,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
unique immutable alphanumeric value.
Returns: Union[str, NaType]
The serial number of the device, or `nvitop.NA` (str: 'N/A') when not available.
The serial number of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=serial
"""
@ -730,7 +755,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with memory information (in bytes) for the device.
Returns: MemoryInfo(total, free, used)
A named tuple with memory information, the item could be `nvitop.NA` (str: 'N/A') when not available.
A named tuple with memory information, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
memory_info = nvml.nvmlQuery('nvmlDeviceGetMemoryInfo', self.handle)
@ -742,10 +767,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total installed GPU memory in bytes.
Returns: Union[int, NaType]
Total installed GPU memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
Total installed GPU memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.total
"""
@ -757,10 +784,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total memory allocated by active contexts in bytes.
Returns: Union[int, NaType]
Total memory allocated by active contexts in bytes, or `nvitop.NA` (str: 'N/A') when not available.
Total memory allocated by active contexts in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.used
"""
@ -770,10 +799,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free memory in bytes.
Returns: Union[int, NaType]
Total free memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
Total free memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.free
"""
@ -783,7 +814,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total installed GPU memory in human readable format.
Returns: Union[str, NaType]
Total installed GPU memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
Total installed GPU memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
if self._memory_total_human is NA:
@ -794,7 +825,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total memory allocated by active contexts in human readable format.
Returns: Union[int, NaType]
Total memory allocated by active contexts in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
Total memory allocated by active contexts in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long
return bytes2human(self.memory_used())
@ -803,7 +834,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free memory in human readable format.
Returns: Union[int, NaType]
Total free memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
Total free memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return bytes2human(self.memory_free())
@ -812,7 +843,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The percentage of used memory over total memory (0 <= p <= 100).
Returns: Union[float, NaType]
The percentage of used memory over total memory, or `nvitop.NA` (str: 'N/A') when not available.
The percentage of used memory over total memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
memory_info = self.memory_info()
@ -835,7 +866,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with BAR1 memory information (in bytes) for the device.
Returns: MemoryInfo(total, free, used)
A named tuple with BAR1 memory information, the item could be `nvitop.NA` (str: 'N/A') when not available.
A named tuple with BAR1 memory information, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
memory_info = nvml.nvmlQuery('nvmlDeviceGetBAR1MemoryInfo', self.handle)
@ -847,7 +878,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total BAR1 memory in bytes.
Returns: Union[int, NaType]
Total BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
Total BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return self.bar1_memory_info().total
@ -856,7 +887,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total used BAR1 memory in bytes.
Returns: Union[int, NaType]
Total used BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
Total used BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return self.bar1_memory_info().used
@ -865,7 +896,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free BAR1 memory in bytes.
Returns: Union[int, NaType]
Total free BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
Total free BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return self.bar1_memory_info().free
@ -874,7 +905,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total BAR1 memory in human readable format.
Returns: Union[int, NaType]
Total BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
Total BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return bytes2human(self.bar1_memory_total())
@ -883,7 +914,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total used BAR1 memory in human readable format.
Returns: Union[int, NaType]
Total used BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
Total used BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return bytes2human(self.bar1_memory_used())
@ -892,7 +923,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total free BAR1 memory in human readable format.
Returns: Union[int, NaType]
Total free BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
Total free BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return bytes2human(self.bar1_memory_free())
@ -901,7 +932,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The percentage of used BAR1 memory over total BAR1 memory (0 <= p <= 100).
Returns: Union[float, NaType]
The percentage of used BAR1 memory over total BAR1 memory, or `nvitop.NA` (str: 'N/A') when not available.
The percentage of used BAR1 memory over total BAR1 memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
memory_info = self.bar1_memory_info()
@ -924,7 +955,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with GPU utilization rates (in percentage) for the device.
Returns: UtilizationRates(gpu, memory, encoder, decoder)
A named tuple with GPU utilization rates (in percentage) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available.
A named tuple with GPU utilization rates (in percentage) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long
gpu, memory, encoder, decoder = NA, NA, NA, NA
@ -948,10 +979,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
The sample period may be between 1 second and 1/6 second depending on the product.
Returns: Union[int, NaType]
The GPU utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available.
The GPU utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.gpu
"""
@ -964,10 +997,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
The sample period may be between 1 second and 1/6 second depending on the product.
Returns: Union[int, NaType]
The memory bandwidth utilization rate of the GPU in percentage, or `nvitop.NA` (str: 'N/A') when not available.
The memory bandwidth utilization rate of the GPU in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.memory
""" # pylint: disable=line-too-long
@ -977,7 +1012,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The encoder utilization rate in percentage.
Returns: Union[int, NaType]
The encoder utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available.
The encoder utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return self.utilization_rates().encoder
@ -986,7 +1021,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The decoder utilization rate in percentage.
Returns: Union[int, NaType]
The decoder utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available.
The decoder utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
return self.utilization_rates().decoder
@ -997,7 +1032,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with current clock speeds (in MHz) for the device.
Returns: ClockInfos(graphics, sm, memory, video)
A named tuple with current clock speeds (in MHz) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available.
A named tuple with current clock speeds (in MHz) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long
return ClockInfos(
@ -1015,7 +1050,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Returns a named tuple with maximum clock speeds (in MHz) for the device.
Returns: ClockInfos(graphics, sm, memory, video)
A named tuple with maximum clock speeds (in MHz) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available.
A named tuple with maximum clock speeds (in MHz) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
""" # pylint: disable=line-too-long
clock_infos = self._max_clock_infos._asdict()
@ -1042,10 +1077,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of graphics (shader) clock in MHz.
Returns: Union[int, NaType]
The current frequency of graphics (shader) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The current frequency of graphics (shader) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.graphics
"""
@ -1055,10 +1092,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of SM (Streaming Multiprocessor) clock in MHz.
Returns: Union[int, NaType]
The current frequency of SM (Streaming Multiprocessor) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The current frequency of SM (Streaming Multiprocessor) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.sm
""" # pylint: disable=line-too-long
@ -1068,10 +1107,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of memory clock in MHz.
Returns: Union[int, NaType]
The current frequency of memory clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The current frequency of memory clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.memory
"""
@ -1081,10 +1122,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Current frequency of video encoder/decoder clock in MHz.
Returns: Union[int, NaType]
The current frequency of video encoder/decoder clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The current frequency of video encoder/decoder clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.video
"""
@ -1094,10 +1137,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of graphics (shader) clock in MHz.
Returns: Union[int, NaType]
The maximum frequency of graphics (shader) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The maximum frequency of graphics (shader) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.graphics
"""
@ -1107,10 +1152,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of SM (Streaming Multiprocessor) clock in MHz.
Returns: Union[int, NaType]
The maximum frequency of SM (Streaming Multiprocessor) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The maximum frequency of SM (Streaming Multiprocessor) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.sm
""" # pylint: disable=line-too-long
@ -1120,10 +1167,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of memory clock in MHz.
Returns: Union[int, NaType]
The maximum frequency of memory clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The maximum frequency of memory clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.memory
"""
@ -1133,10 +1182,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Maximum frequency of video encoder/decoder clock in MHz.
Returns: Union[int, NaType]
The maximum frequency of video encoder/decoder clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
The maximum frequency of video encoder/decoder clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.video
"""
@ -1151,10 +1202,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
because they rely on cooling via fans in the surrounding enclosure.
Returns: Union[int, NaType]
The fan speed value in percentage, or `nvitop.NA` (str: 'N/A') when not available.
The fan speed value in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=fan.speed
"""
@ -1165,10 +1218,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Core GPU temperature. in degrees C.
Returns: Union[int, NaType]
The core GPU temperature in Celsius degrees, or `nvitop.NA` (str: 'N/A') when not available.
The core GPU temperature in Celsius degrees, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=temperature.gpu
"""
@ -1180,10 +1235,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The last measured power draw for the entire board in milliwatts.
Returns: Union[int, NaType]
The power draw for the entire board in milliwatts, or `nvitop.NA` (str: 'N/A') when not available.
The power draw for the entire board in milliwatts, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
$(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.draw)" * 1000 ))
"""
@ -1197,10 +1254,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""The software power limit in milliwatts. Set by software like nvidia-smi.
Returns: Union[int, NaType]
The software power limit in milliwatts, or `nvitop.NA` (str: 'N/A') when not available.
The software power limit in milliwatts, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
$(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.limit)" * 1000 ))
"""
@ -1230,10 +1289,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType]
- 'Disabled': if not an active display device.
- 'Enabled': if an active display device.
- `nvitop.NA` (str: 'N/A'): if not available.
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_active
"""
@ -1248,10 +1309,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType]
- 'Disabled': if the display mode is disabled.
- 'Enabled': if the display mode is enabled.
- `nvitop.NA` (str: 'N/A'): if not available.
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_mode
"""
@ -1268,10 +1331,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType]
- 'WDDM': for WDDM driver model on Windows.
- 'WDM': for TTC (WDM) driver model on Windows.
- `nvitop.NA` (str: 'N/A'): if not available, e.g. on Linux.
- ``nvitop.NA`` (str: ``'N/A'``): if not available, e.g. on Linux.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=driver_model.current
"""
@ -1292,10 +1357,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType]
- 'Disabled': if the persistence mode is disabled.
- 'Enabled': if the persistence mode is enabled.
- `nvitop.NA` (str: 'N/A'): if not available.
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=persistence_mode
"""
@ -1307,10 +1374,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
P12 (minimum performance).
Returns: Union[str, NaType]
The current performance state in format `P<int>`, or `nvitop.NA` (str: 'N/A') when not available.
The current performance state in format ``P<int>``, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pstate
"""
@ -1324,10 +1393,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
"""Total errors detected across entire chip.
Returns: Union[int, NaType]
The total number of uncorrected errors in volatile ECC memory, or `nvitop.NA` (str: 'N/A') when not available.
The total number of uncorrected errors in volatile ECC memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=ecc.errors.uncorrected.volatile.total
""" # pylint: disable=line-too-long
@ -1345,10 +1416,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
- 'Exclusive Thread': deprecated, use Exclusive Process instead
- 'Prohibited': means no contexts are allowed per device (no compute apps).
- 'Exclusive Process': means only one context is allowed per device, usable from multiple threads at a time.
- `nvitop.NA` (str: 'N/A'): if not available.
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=compute_mode
"""
@ -1375,10 +1448,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
Returns: Union[str, NaType]
- 'Disabled': if the MIG mode is disabled.
- 'Enabled': if the MIG mode is enabled.
- `nvitop.NA` (str: 'N/A'): if not available, e.g. the GPU does not support MIG mode.
- ``nvitop.NA`` (str: ``'N/A'``): if not available, e.g. the GPU does not support MIG mode.
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=mig.mode.current
"""
@ -1390,7 +1465,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return {0: 'Disabled', 1: 'Enabled'}.get(mig_mode, NA)
def is_mig_mode_enabled(self) -> bool:
"""Returns whether the MIG mode is enabled on the device. Returns `False` if MIG mode is
"""Returns whether the MIG mode is enabled on the device. Returns ``False`` if MIG mode is
disabled or the device does not support MIG mode.
"""
@ -1440,7 +1515,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
return processes
def as_snapshot(self) -> Snapshot:
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`."""
"""Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
with self.oneshot():
return Snapshot(real=self, index=self.index, physical_index=self.physical_index,
@ -1544,6 +1619,8 @@ class PhysicalDevice(Device):
Command line equivalent:
.. code:: bash
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=index
"""
@ -1636,18 +1713,18 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
"""Returns a list of MIG devices of the given indices.
Args:
indices (list of tuple of two ints):
indices (Iterable[Tuple[int, int]]):
Indices of the MIG devices. Each index is a tuple of two integers.
Returns: List[MigDevice]
A list of `MigDevice` instances of the given indices.
A list of ``MigDevice`` instances of the given indices.
"""
return list(map(cls, indices))
def __init__(self, index: Optional[Union[Tuple[int, int], str]] = None, *, # pylint: disable=super-init-not-called
uuid: Optional[str] = None) -> None:
"""Initializes the instance created by `__new__()`."""
"""Initializes the instance created by ``__new__()``."""
if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID
index, uuid = None, index
@ -1727,7 +1804,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
"""The gpu instance ID of the MIG device.
Returns: Union[int, NaType]
The gpu instance ID of the MIG device, or `nvitop.NA` (str: 'N/A') when not available.
The gpu instance ID of the MIG device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
if self._gpu_instance_id is NA:
@ -1741,7 +1818,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
"""The compute instance ID of the MIG device.
Returns: Union[int, NaType]
The compute instance ID of the MIG device, or `nvitop.NA` (str: 'N/A') when not available.
The compute instance ID of the MIG device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
"""
if self._compute_instance_id is NA:
@ -1752,7 +1829,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
return self._compute_instance_id
def as_snapshot(self) -> Snapshot:
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`."""
"""Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
snapshot = super().as_snapshot()
snapshot.mig_index = self.mig_index
@ -1764,15 +1841,18 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
class CudaDevice(Device):
"""Class for devices enumerated over the CUDA ordinal. The order can be vary for different
`CUDA_VISIBLE_DEVICES` environment variable.
environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
`CudaDevice.__new__()` returns different types depending on the given arguments.
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
``CudaDevice.__new__()`` returns different types depending on the given arguments.
.. code-block:: python
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice
@ -1808,16 +1888,50 @@ class CudaDevice(Device):
@classmethod
def count(cls) -> int:
"""The number of GPUs visible to CUDA applications."""
"""The number of GPUs visible to CUDA applications.
Raises:
RuntimeError:
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
"""
return len(super().parse_cuda_visible_devices())
@classmethod
def all(cls) -> List['CudaDevice']:
"""All CUDA visible devices.
Raises:
RuntimeError:
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
"""
return cls.from_indices()
@classmethod
def from_indices(cls, indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']:
"""Returns a list of CUDA devices of the given CUDA indices.
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
See also for CUDA Device Enumeration:
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
Args:
cuda_indices (Iterable[int]):
The value of ``CUDA_VISIBLE_DEVICES``, if not given, the value from the environment
will be used.
Returns: List[CudaDevice]
A list of ``CudaDevice`` of the given CUDA indices.
Raises:
RuntimeError:
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError:
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
"""
return super().from_cuda_indices(indices)
def __new__(cls, cuda_index: Optional[int] = None, *,
@ -1825,15 +1939,17 @@ class CudaDevice(Device):
uuid: Optional[str] = None) -> 'Device':
"""Creates a new instance of CudaDevice. The type of the result is determined by the given argument.
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
- (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice
.. code-block:: python
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
- (nvml_index: int) -> CudaDevice
- (nvml_index: (int, int)) -> CudaMigDevice
Note: This method takes exact 1 non-None argument.
Returns: Union[CudaDevice, CudaMigDevice]
A `CudaDevice` instance or a `CudaMigDevice` instance.
A ``CudaDevice`` instance or a ``CudaMigDevice`` instance.
Raises:
TypeError:
@ -1842,9 +1958,9 @@ class CudaDevice(Device):
If the given index is a tuple but is not consist of two integers.
Raises:
RuntimeError:
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
RuntimeError:
If the index is out of range for the given `CUDA_VISIBLE_DEVICES` environment variable.
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
"""
if cuda_index is not None and nvml_index is None and uuid is None:
@ -1861,7 +1977,7 @@ class CudaDevice(Device):
def __init__(self, cuda_index: Optional[int] = None, *,
nvml_index: Optional[Union[int, Tuple[int, int]]] = None,
uuid: Optional[str] = None) -> None:
"""Initializes the instance created by `__new__()`.
"""Initializes the instance created by ``__new__()``.
Raises:
RuntimeError:
@ -1895,7 +2011,7 @@ class CudaDevice(Device):
return self.__class__, (self._cuda_index,)
def as_snapshot(self) -> Snapshot:
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`."""
"""Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
snapshot = super().as_snapshot()
snapshot.cuda_index = self.cuda_index

View file

@ -1,7 +1,7 @@
# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
# License: GNU GPL version 3.
"""Shortcuts for package `psutil`.
"""Shortcuts for package ``psutil``.
psutil is a cross-platform library for retrieving information on running processes
and system utilization (CPU, memory, disks, network, sensors) in Python.
@ -53,11 +53,11 @@ def swap_percent():
ppid_map = _psutil._ppid_map # pylint: disable=protected-access
"""Obtains a `{pid: ppid, ...}` dict for all running processes in one shot."""
"""Obtains a ``{pid: ppid, ...}`` dict for all running processes in one shot."""
def reverse_ppid_map(): # pylint: disable=function-redefined
"""Obtains a `{ppid: [pid, ...], ...}` dict for all running processes in one shot."""
"""Obtains a ``{ppid: [pid, ...], ...}`` dict for all running processes in one shot."""
tree = _defaultdict(list)
for pid, ppid in ppid_map().items():

View file

@ -18,11 +18,11 @@ import pynvml
from nvitop.core.utils import NA, colored
__all__ = ['nvml', 'nvmlCheckReturn', 'NVMLError']
__all__ = ['libnvml', 'nvml', 'nvmlCheckReturn', 'NVMLError']
class libnvml:
"""The helper singleton class that holds members from package `nvidia-ml-py`."""
"""The helper singleton class that holds members from package ``nvidia-ml-py``."""
NVMLError = pynvml.NVMLError
"""Base exception class for NVML query errors."""
@ -34,7 +34,7 @@ class libnvml:
c_nvmlDevice_t = pynvml.c_nvmlDevice_t
def __new__(cls) -> 'libnvml':
"""Gets the singleton instance of `libnvml`."""
"""Gets the singleton instance of ``libnvml``."""
if not hasattr(cls, '_instance'):
instance = cls._instance = super().__new__(cls)
@ -62,13 +62,13 @@ class libnvml:
pass
def __enter__(self) -> 'libnvml':
"""Entry of the context manager for `with` statement."""
"""Entry of the context manager for ``with`` statement."""
self._lazy_init()
return self
def __exit__(self, *args, **kwargs) -> None:
"""Shutdowns the NVML context in the context manager for `with` statement."""
"""Shutdowns the NVML context in the context manager for ``with`` statement."""
self.__del__()
@ -100,8 +100,8 @@ class libnvml:
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
driver without reloading the kernel module.
AttributeError:
If cannot find function `nvmlInitWithFlags`, usually the `pynvml` module is overridden
by other modules. Need to reinstall package `nvidia-ml-py`.
If cannot find function ``nvmlInitWithFlags``, usually the ``pynvml`` module is overridden
by other modules. Need to reinstall package ``nvidia-ml-py``.
"""
self.nvmlInitWithFlags(0)
@ -118,8 +118,8 @@ class libnvml:
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
driver without reloading the kernel module.
AttributeError:
If cannot find function `nvmlInitWithFlags`, usually the `pynvml` module is overridden
by other modules. Need to reinstall package `nvidia-ml-py`.
If cannot find function ``nvmlInitWithFlags``, usually the ``pynvml`` module is overridden
by other modules. Need to reinstall package ``nvidia-ml-py``.
"""
with self._lock:
@ -179,7 +179,7 @@ class libnvml:
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
driver without reloading the kernel module.
NVMLError_Uninitialized:
If NVML was not first initialized with `nvmlInit()`.
If NVML was not first initialized with ``nvmlInit()``.
"""
pynvml.nvmlShutdown()
@ -199,16 +199,16 @@ class libnvml:
"""Calls a function with the given arguments from NVML. The NVML context will be lazily initialized.
Args:
func (function or str):
func (Union[Callable[..., Any], str]):
The function to call. If it is given by string, lookup for the
function first from `pynvml`.
default (any):
function first from ``pynvml``.
default (Any):
The default value if the query fails.
ignore_errors (bool):
Whether to ignore errors and return the default value.
ignore_function_not_found (bool):
Whether to ignore function not found errors and return the
default value. If set to `False`, a error message will be logged
default value. If set to ``False``, a error message will be logged
to the logger.
*args:
Positional arguments to pass to the query function.
@ -251,7 +251,7 @@ class libnvml:
@staticmethod
def nvmlCheckReturn(retval: Any, types: Optional[Union[type, Tuple[type, ...]]] = None) -> bool:
"""Checks the return value is not `nvitop.NA` and is one of the given types."""
"""Checks the return value is not ``nvitop.NA`` and is one of the given types."""
if types is None:
return retval != NA
@ -259,7 +259,7 @@ class libnvml:
nvml = libnvml()
"""The singleton instance of `libnvml`."""
"""The singleton instance of class ``libnvml``."""
nvmlCheckReturn = nvml.nvmlCheckReturn

View file

@ -81,7 +81,7 @@ _USE_FALLBACK_WHEN_RAISE = threading.local() # see also `GpuProcess.failsafe`
def auto_garbage_clean(fallback=_RAISE):
"""Removes the object references in the instance cache if the method call fails (the process is gone).
The fallback value will be used with `GpuProcess.failsafe` context manager, otherwise raises an
The fallback value will be used with ``GpuProcess.failsafe`` context manager, otherwise raises an
exception when falls.
"""
@ -161,7 +161,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
HOST_SNAPSHOTS = {}
def __new__(cls, pid: Optional[int] = None) -> 'HostProcess':
"""Returns the cached instance of `HostProcess`."""
"""Returns the cached instance of ``HostProcess``."""
if pid is None:
pid = os.getpid()
@ -248,7 +248,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
@memoize_when_activated
def running_time(self) -> datetime.timedelta:
"""The elapsed time this process has been running in `datetime.timedelta`."""
"""The elapsed time this process has been running in ``datetime.timedelta``."""
return datetime.datetime.now() - datetime.datetime.fromtimestamp(self.create_time())
@ -272,7 +272,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
return self.memory_info().rss
def parent(self) -> Union['HostProcess', None]:
"""Returns the parent process as a `HostProcess` instance. Returns `None` if there is no parent."""
"""Returns the parent process as a ``HostProcess`` instance. Returns ``None`` if there is no parent."""
parent = super().parent()
if parent is not None:
@ -280,8 +280,8 @@ class HostProcess(host.Process, metaclass=ABCMeta):
return None
def children(self, recursive: bool = False) -> List['HostProcess']:
"""Return the children of this process as a list of `HostProcess` instances.
If *recursive* is `True` return all the descendants.
"""Return the children of this process as a list of ``HostProcess`` instances.
If *recursive* is ``True`` return all the descendants.
"""
return [HostProcess(child.pid) for child in super().children(recursive)]
@ -294,7 +294,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
Internally different process info (e.g. name, ppid, uids, gids, ...) may be fetched by using
the same routine, but only one information is returned and the others are discarded. When
using this context manager the internal routine is executed once (in the example below on
`name()`) and the other info are cached.
``name()``) and the other info are cached.
The cache is cleared when exiting the context manager block. The advice is to use this every
time you retrieve more than one information about the process.
@ -345,7 +345,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
"""Represents a process with the given PID running on the given GPU device.
The instance will be cache during the lifetime of the process.
The same host process can use multiple GPU devices. The `GpuProcess` instances representing the
The same host process can use multiple GPU devices. The ``GpuProcess`` instances representing the
same PID on the host but different GPU devices are different.
"""
@ -355,7 +355,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
def __new__(cls, pid: int, device: 'Device',
gpu_memory: Optional[Union[int, NaType]] = None, # pylint: disable=unused-argument
type: Optional[Union[str, NaType]] = None) -> 'GpuProcess': # pylint: disable=unused-argument,redefined-builtin
"""Returns the cached instance of `GpuProcess`."""
"""Returns the cached instance of ``GpuProcess``."""
if pid is None:
pid = os.getpid()
@ -385,7 +385,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
def __init__(self, pid: int, device: 'Device', # pylint: disable=unused-argument
gpu_memory: Optional[Union[int, NaType]] = None,
type: Optional[Union[str, NaType]] = None) -> None: # pylint: disable=redefined-builtin
"""Initializes the instance returned by `__new__()`."""
"""Initializes the instance returned by ``__new__()``."""
if gpu_memory is None and not hasattr(self, '_gpu_memory'):
gpu_memory = NA
@ -456,54 +456,54 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
"""The GPU device the process running on.
The same host process can use multiple GPU devices.
The `GpuProcess` instances representing the same PID on the host
The ``GpuProcess`` instances representing the same PID on the host
but different GPU devices are different.
"""
return self._device
def gpu_instance_id(self) -> Union[int, NaType]:
"""The GPU instance ID of the MIG device, or `nvitop.NA` if not available."""
"""The GPU instance ID of the MIG device, or ``nvitop.NA`` if not available."""
return self._gpu_instance_id
def compute_instance_id(self) -> Union[int, NaType]:
"""The compute instance ID of the MIG device, or `nvitop.NA` if not available."""
"""The compute instance ID of the MIG device, or ``nvitop.NA`` if not available."""
return self._compute_instance_id
def gpu_memory(self) -> Union[int, NaType]: # in bytes
"""The used GPU memory in bytes, or `nvitop.NA` if not available."""
"""The used GPU memory in bytes, or ``nvitop.NA`` if not available."""
return self._gpu_memory
def gpu_memory_human(self) -> Union[str, NaType]: # in human readable
"""The used GPU memory in human readable format, or `nvitop.NA` if not available."""
"""The used GPU memory in human readable format, or ``nvitop.NA`` if not available."""
return self._gpu_memory_human
def gpu_memory_percent(self) -> Union[float, NaType]: # in percentage
"""The percentage of used GPU memory by the process, or `nvitop.NA` if not available."""
"""The percentage of used GPU memory by the process, or ``nvitop.NA`` if not available."""
return self._gpu_memory_percent
def gpu_sm_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of SM (Streaming Multiprocessor), or `nvitop.NA` if not available."""
"""The utilization rate of SM (Streaming Multiprocessor), or ``nvitop.NA`` if not available."""
return self._gpu_sm_utilization
def gpu_memory_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of GPU memory bandwidth, or `nvitop.NA` if not available."""
"""The utilization rate of GPU memory bandwidth, or ``nvitop.NA`` if not available."""
return self._gpu_memory_utilization
def gpu_encoder_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of the encoder, or `nvitop.NA` if not available."""
"""The utilization rate of the encoder, or ``nvitop.NA`` if not available."""
return self._gpu_encoder_utilization
def gpu_decoder_utilization(self) -> Union[int, NaType]: # in percentage
"""The utilization rate of the decoder, or `nvitop.NA` if not available."""
"""The utilization rate of the decoder, or ``nvitop.NA`` if not available."""
return self._gpu_decoder_utilization
@ -583,7 +583,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
@auto_garbage_clean(fallback=NA)
def running_time(self) -> Union[datetime.timedelta, NaType]:
"""The elapsed time this process has been running in `datetime.timedelta`."""
"""The elapsed time this process has been running in ``datetime.timedelta``."""
return self.host.running_time()
@ -733,10 +733,10 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
@classmethod
def take_snapshots(cls, gpu_processes: Iterable['GpuProcess'], *, # batched version of `as_snapshot`
failsafe=False) -> List[Snapshot]:
"""Takes snapshots for a list of `GpuProcess` instances.
"""Takes snapshots for a list of ``GpuProcess`` instances.
If *failsafe* is `True`, then if any method fails, the fallback value in
`auto_garbage_clean(fallback)` will be used.
If *failsafe* is ``True``, then if any method fails, the fallback value in
``auto_garbage_clean(fallback)`` will be used.
"""
cache = {}

View file

@ -77,28 +77,28 @@ class NotApplicableType(str):
return math.nan
def __lt__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string."""
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)):
return False
return super().__lt__(x)
def __le__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string."""
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)):
return False
return super().__le__(x)
def __gt__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string."""
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)):
return True
return super().__gt__(x)
def __ge__(self, x):
"""The `NA` is always greater than any number. Use the dictionary order for string."""
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
if isinstance(x, (int, float)):
return True
@ -116,7 +116,7 @@ class NotApplicableType(str):
# NA is NotApplicableType() -> True (NotApplicableType is a singleton class)
NaType = NotApplicableType
NA = NotApplicable = NotApplicableType()
"""The singleton instance of `NotApplicableType`. The actual value is 'NA'."""
"""The singleton instance of ``NotApplicableType``. The actual value is 'NA'."""
KiB = 1 << 10
@ -167,7 +167,7 @@ def bytes2human(x): # pylint: disable=too-many-return-statements
def timedelta2human(dt):
"""Converts `datetime.timedelta` instance to a human readable string."""
"""Converts ``datetime.timedelta`` instance to a human readable string."""
if isinstance(dt, (int, float)):
dt = datetime.timedelta(seconds=dt)
@ -209,7 +209,7 @@ def boolify(string, default=None):
class Snapshot:
"""A dict-like object holds the snapshot values.
The value can be accessed by `snapshot.name` or `snapshot[name]` syntax.
The value can be accessed by ``snapshot.name`` or ``snapshot['name']`` syntax.
Missing attributes will be automatically fetched from the original object.
"""
@ -253,7 +253,7 @@ class Snapshot:
return attribute
def __getitem__(self, name):
"""Supports `dict[name]` syntax."""
"""Supports ``dict['name']`` syntax."""
try:
return self.__getattr__(name)
@ -261,7 +261,7 @@ class Snapshot:
raise KeyError from e
def __setitem__(self, name, value):
"""Supports `dict[name] = value` syntax."""
"""Supports ``dict['name'] = value`` syntax."""
self.__setattr__(name, value)