mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-15 14:15:55 -06:00
docs: add Sphinx-based documents
Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
parent
3bb17f6cc9
commit
102ee45960
24 changed files with 635 additions and 172 deletions
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
6
docs/requirements.txt
Normal file
6
docs/requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
sphinx
|
||||
sphinx-rtd-theme
|
||||
sphinx-copybutton
|
||||
|
||||
pytorch-lightning >= 1.5.0
|
||||
tensorflow >= 2.0
|
||||
0
docs/source/_static/.gitkeep
Normal file
0
docs/source/_static/.gitkeep
Normal file
3
docs/source/_static/style.css
Normal file
3
docs/source/_static/style.css
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
.wy-nav-content {
|
||||
max-width: none;
|
||||
}
|
||||
0
docs/source/_templates/.gitkeep
Normal file
0
docs/source/_templates/.gitkeep
Normal file
45
docs/source/apis/callbacks.rst
Normal file
45
docs/source/apis/callbacks.rst
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
nvitop.callbacks package
|
||||
========================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
nvitop.callbacks.keras module
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: nvitop.callbacks.keras
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
nvitop.callbacks.pytorch\_lightning module
|
||||
------------------------------------------
|
||||
|
||||
.. automodule:: nvitop.callbacks.pytorch_lightning
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
nvitop.callbacks.tensorboard module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: nvitop.callbacks.tensorboard
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
nvitop.callbacks.utils module
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: nvitop.callbacks.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: nvitop.callbacks
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
8
docs/source/apis/core/collector.rst
Normal file
8
docs/source/apis/core/collector.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
nvitop.core.collector module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: nvitop.core.collector
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:member-order: bysource
|
||||
8
docs/source/apis/core/device.rst
Normal file
8
docs/source/apis/core/device.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
nvitop.core.device module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: nvitop.core.device
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:member-order: bysource
|
||||
8
docs/source/apis/core/host.rst
Normal file
8
docs/source/apis/core/host.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
nvitop.core.host module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: nvitop.core.host
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:member-order: bysource
|
||||
8
docs/source/apis/core/libnvml.rst
Normal file
8
docs/source/apis/core/libnvml.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
nvitop.core.libnvml module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: nvitop.core.libnvml
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:member-order: bysource
|
||||
8
docs/source/apis/core/process.rst
Normal file
8
docs/source/apis/core/process.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
nvitop.core.process module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: nvitop.core.process
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:member-order: bysource
|
||||
8
docs/source/apis/core/utils.rst
Normal file
8
docs/source/apis/core/utils.rst
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
nvitop.core.utils module
|
||||
------------------------
|
||||
|
||||
.. automodule:: nvitop.core.utils
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
:member-order: bysource
|
||||
24
docs/source/apis/index.rst
Normal file
24
docs/source/apis/index.rst
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
nvitop package
|
||||
==============
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
core/device
|
||||
core/process
|
||||
core/host
|
||||
core/collector
|
||||
core/libnvml
|
||||
core/utils
|
||||
callbacks
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
.. automodule:: nvitop
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
136
docs/source/conf.py
Normal file
136
docs/source/conf.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'nvitop: the one-stop solution for GPU process management.'
|
||||
copyright = '2022, Xuehai Pan'
|
||||
author = 'Xuehai Pan'
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.todo',
|
||||
'sphinx.ext.coverage',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.ifconfig',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.githubpages',
|
||||
'sphinx.ext.extlinks',
|
||||
'sphinx_copybutton',
|
||||
'sphinx_rtd_theme',
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
# You can specify multiple suffix as a list of string:
|
||||
#
|
||||
# source_suffix = ['.rst', '.md']
|
||||
source_suffix = '.rst'
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = 'index'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
#
|
||||
# This is also used if you do content translation via gettext catalogs.
|
||||
# Usually you set "language" from the command line for these cases.
|
||||
language = 'en'
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['_build', 'build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = 'default'
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
#
|
||||
# html_theme_options = {}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_css_files = [
|
||||
'style.css',
|
||||
]
|
||||
|
||||
# Custom sidebar templates, must be a dictionary that maps document names
|
||||
# to template names.
|
||||
#
|
||||
# The default sidebars (for documents that don't match any pattern) are
|
||||
# defined by theme itself. Builtin themes are using these templates by
|
||||
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
|
||||
# 'searchbox.html']``.
|
||||
#
|
||||
# html_sidebars = {}
|
||||
|
||||
extlinks = {
|
||||
'gitcode': ('https://github.com/XuehaiPan/nvitop/blob/HEAD/%s', '')
|
||||
}
|
||||
|
||||
|
||||
# -- Options for manual page output ------------------------------------------
|
||||
|
||||
# One entry per manual page. List of tuples
|
||||
# (source start file, name, description, authors, manual section).
|
||||
man_pages = [
|
||||
(master_doc, 'nvitop', 'An interactive NVIDIA-GPU process viewer.',
|
||||
[author], 1)
|
||||
]
|
||||
|
||||
|
||||
# -- Extension configuration -------------------------------------------------
|
||||
|
||||
# -- Options for napoleon extension ------------------------------------------
|
||||
|
||||
napoleon_include_init_with_doc = True
|
||||
napoleon_include_private_with_doc = False
|
||||
napoleon_include_special_with_doc = True
|
||||
|
||||
# -- Options for intersphinx extension ---------------------------------------
|
||||
|
||||
# Example configuration for intersphinx: refer to the Python standard library.
|
||||
intersphinx_mapping = {'https://docs.python.org/': None}
|
||||
|
||||
# -- Options for todo extension ----------------------------------------------
|
||||
|
||||
# If true, `todo` and `todoList` produce output, else they produce nothing.
|
||||
todo_include_todos = True
|
||||
18
docs/source/index.rst
Normal file
18
docs/source/index.rst
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
Welcome to nvitop's documentation!
|
||||
==================================
|
||||
|
||||
An interactive NVIDIA-GPU process viewer, the one-stop solution for GPU process management.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
:caption: Contents:
|
||||
|
||||
apis/index
|
||||
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
|
|
@ -22,15 +22,21 @@ class GpuStatsLogger(Callback): # pylint: disable=too-many-instance-attributes
|
|||
a CSVLogger callback to the model.
|
||||
|
||||
Args:
|
||||
memory_utilization: Set to ``True`` to log used, free and the percentage of memory
|
||||
memory_utilization (bool):
|
||||
Set to ``True`` to log used, free and the percentage of memory
|
||||
utilization at the start and end of each step. Default: ``True``.
|
||||
gpu_utilization: Set to ``True`` to log the percentage of GPU utilization
|
||||
gpu_utilization (bool):
|
||||
Set to ``True`` to log the percentage of GPU utilization
|
||||
at the start and end of each step. Default: ``True``.
|
||||
intra_step_time: Set to ``True`` to log the time of each step. Default: ``False``.
|
||||
inter_step_time: Set to ``True`` to log the time between the end of one step
|
||||
intra_step_time (bool):
|
||||
Set to ``True`` to log the time of each step. Default: ``False``.
|
||||
inter_step_time (bool):
|
||||
Set to ``True`` to log the time between the end of one step
|
||||
and the start of the next step. Default: ``False``.
|
||||
fan_speed: Set to ``True`` to log percentage of fan speed. Default: ``False``.
|
||||
temperature: Set to ``True`` to log the gpu temperature in degree Celsius.
|
||||
fan_speed (bool):
|
||||
Set to ``True`` to log percentage of fan speed. Default: ``False``.
|
||||
temperature (bool):
|
||||
Set to ``True`` to log the gpu temperature in degree Celsius.
|
||||
Default: ``False``.
|
||||
|
||||
Raises:
|
||||
|
|
|
|||
|
|
@ -22,15 +22,21 @@ class GpuStatsLogger(Callback): # pylint: disable=too-many-instance-attributes
|
|||
callback and in order to use it you need to assign a logger in the ``Trainer``.
|
||||
|
||||
Args:
|
||||
memory_utilization: Set to ``True`` to log used, free and the percentage of memory
|
||||
memory_utilization (bool):
|
||||
Set to ``True`` to log used, free and the percentage of memory
|
||||
utilization at the start and end of each step. Default: ``True``.
|
||||
gpu_utilization: Set to ``True`` to log the percentage of GPU utilization
|
||||
gpu_utilization (bool):
|
||||
Set to ``True`` to log the percentage of GPU utilization
|
||||
at the start and end of each step. Default: ``True``.
|
||||
intra_step_time: Set to ``True`` to log the time of each step. Default: ``False``.
|
||||
inter_step_time: Set to ``True`` to log the time between the end of one step
|
||||
intra_step_time (bool):
|
||||
Set to ``True`` to log the time of each step. Default: ``False``.
|
||||
inter_step_time (bool):
|
||||
Set to ``True`` to log the time between the end of one step
|
||||
and the start of the next step. Default: ``False``.
|
||||
fan_speed: Set to ``True`` to log percentage of fan speed. Default: ``False``.
|
||||
temperature: Set to ``True`` to log the gpu temperature in degree Celsius.
|
||||
fan_speed (bool):
|
||||
Set to ``True`` to log percentage of fan speed. Default: ``False``.
|
||||
temperature (bool):
|
||||
Set to ``True`` to log the gpu temperature in degree Celsius.
|
||||
Default: ``False``.
|
||||
|
||||
Raises:
|
||||
|
|
|
|||
|
|
@ -159,10 +159,10 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
|
|||
"""A class for collecting resource metrics.
|
||||
|
||||
Args:
|
||||
devices (iterable of Device):
|
||||
devices (Iterable[Device]):
|
||||
Set of Device instances for logging. If not given, all physical
|
||||
devices on board will be used.
|
||||
root_pids (set of int):
|
||||
root_pids (Set[int]):
|
||||
A set of PIDs, only the status of the children processes on the GPUs
|
||||
will be collected. If not given, the PID of the current process will
|
||||
be used.
|
||||
|
|
@ -422,7 +422,7 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
|
|||
the sub-collections will be reset as well.
|
||||
|
||||
Args:
|
||||
tag (str or None):
|
||||
tag (Optional[str]):
|
||||
The tag to reset. If None, the current active collection
|
||||
will be reset.
|
||||
|
||||
|
|
|
|||
|
|
@ -3,18 +3,24 @@
|
|||
|
||||
"""The live classes for GPU devices.
|
||||
|
||||
The core classes are `Device` and `CudaDevice`. The type of returned instance created by `Class(args)`
|
||||
The core classes are ``Device`` and ``CudaDevice``. The type of returned instance created by ``Class(args)``
|
||||
is depending on the given arguments.
|
||||
|
||||
`Device()` returns:
|
||||
``Device()`` returns:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
- (index: int) -> PhysicalDevice
|
||||
- (index: (int, int)) -> MigDevice
|
||||
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
|
||||
- (bus_id: str) -> PhysicalDevice
|
||||
|
||||
`CudaDevice()` returns:
|
||||
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
|
||||
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
|
||||
``CudaDevice()`` returns:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
|
||||
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
|
||||
- (nvml_index: int) -> CudaDevice
|
||||
- (nvml_index: (int, int)) -> CudaMigDevice
|
||||
|
||||
|
|
@ -127,7 +133,7 @@ def _does_any_device_support_mig_mode() -> bool:
|
|||
|
||||
|
||||
def is_mig_device_uuid(uuid: Optional[str]) -> bool:
|
||||
"""Returns `True` if the argument is a MIG device UUID, otherwise, returns `False`."""
|
||||
"""Returns ``True`` if the argument is a MIG device UUID, otherwise, returns ``False``."""
|
||||
|
||||
if isinstance(uuid, str):
|
||||
match = Device.UUID_PATTERN.match(uuid)
|
||||
|
|
@ -139,7 +145,10 @@ def is_mig_device_uuid(uuid: Optional[str]) -> bool:
|
|||
class Device: # pylint: disable=too-many-instance-attributes,too-many-public-methods
|
||||
"""Live class of the GPU devices, different from the device snapshots.
|
||||
|
||||
`Device.__new__()` returns different types depending on the given arguments.
|
||||
``Device.__new__()`` returns different types depending on the given arguments.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
- (index: int) -> PhysicalDevice
|
||||
- (index: (int, int)) -> MigDevice
|
||||
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
|
||||
|
|
@ -206,6 +215,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=driver_version
|
||||
"""
|
||||
|
||||
|
|
@ -233,6 +244,8 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=0 --format=csv,noheader,nounits --query-gpu=count
|
||||
"""
|
||||
|
||||
|
|
@ -251,14 +264,14 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Returns a list of devices of the given indices.
|
||||
|
||||
Args:
|
||||
indices (list of int or tuple of two ints):
|
||||
Indices of the devices. For each index, get `PhysicalDevice` for single int
|
||||
and `MigDevice` for tuple (int, int). That is:
|
||||
indices (Iterable[Union[int, Tuple[int, int]]]):
|
||||
Indices of the devices. For each index, get ``PhysicalDevice`` for single int
|
||||
and ``MigDevice`` for tuple (int, int). That is:
|
||||
- (int) -> PhysicalDevice
|
||||
- ((int, int)) -> MigDevice
|
||||
|
||||
Returns: List[Union[PhysicalDevice, MigDevice]]
|
||||
A list of `PhysicalDevice` and/or `MigDevice` instances of the given indices.
|
||||
A list of ``PhysicalDevice`` and/or ``MigDevice`` instances of the given indices.
|
||||
"""
|
||||
|
||||
if indices is None:
|
||||
|
|
@ -271,19 +284,19 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
@staticmethod
|
||||
def from_cuda_visible_devices() -> List['CudaDevice']:
|
||||
"""Returns a list of CUDA devices of the given CUDA indices.
|
||||
The CUDA ordinal will be enumerate from the `CUDA_VISIBLE_DEVICES` environment variable.
|
||||
"""Returns a list of all CUDA visible devices.
|
||||
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
|
||||
See also for CUDA Device Enumeration:
|
||||
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
|
||||
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
|
||||
|
||||
Returns: List[CudaDevice]
|
||||
A list of `CudaDevice` instances.
|
||||
A list of ``CudaDevice`` instances.
|
||||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
"""
|
||||
|
||||
visible_device_indices = Device.parse_cuda_visible_devices()
|
||||
|
|
@ -294,28 +307,30 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
return cuda_devices
|
||||
|
||||
cuda_all = from_cuda_visible_devices
|
||||
|
||||
@staticmethod
|
||||
def from_cuda_indices(cuda_indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']:
|
||||
"""Returns a list of CUDA devices of the given CUDA indices.
|
||||
The CUDA ordinal will be enumerate from the `CUDA_VISIBLE_DEVICES` environment variable.
|
||||
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
|
||||
See also for CUDA Device Enumeration:
|
||||
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
|
||||
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
|
||||
|
||||
Args:
|
||||
cuda_indices (list of int):
|
||||
The value of `CUDA_VISIBLE_DEVICES`, if not given, the value from the environment
|
||||
cuda_indices (Iterable[int]):
|
||||
The value of ``CUDA_VISIBLE_DEVICES``, if not given, the value from the environment
|
||||
will be used.
|
||||
|
||||
Returns: List[CudaDevice]
|
||||
A list of `CudaDevice` of the given CUDA indices.
|
||||
A list of ``CudaDevice`` of the given CUDA indices.
|
||||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
RuntimeError:
|
||||
If the index is out of range for the given `CUDA_VISIBLE_DEVICES` environment variable.
|
||||
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
"""
|
||||
|
||||
cuda_devices = Device.from_cuda_visible_devices()
|
||||
|
|
@ -340,15 +355,15 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
@staticmethod
|
||||
def parse_cuda_visible_devices(cuda_visible_devices: Optional[str] = None) -> Union[List[int],
|
||||
List[Tuple[int, int]]]:
|
||||
"""Parses the given `CUDA_VISIBLE_DEVICES` value into NVML device indices.
|
||||
"""Parses the given ``CUDA_VISIBLE_DEVICES`` value into NVML device indices.
|
||||
|
||||
See also for CUDA Device Enumeration:
|
||||
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
|
||||
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
|
||||
|
||||
Args:
|
||||
cuda_visible_devices (str or None):
|
||||
The value of the `CUDA_VISIBLE_DEVICES` variable. If not given, the value from the
|
||||
cuda_visible_devices (Optional[str]):
|
||||
The value of the ``CUDA_VISIBLE_DEVICES`` variable. If not given, the value from the
|
||||
environment will be used.
|
||||
|
||||
Returns: Union[List[int], List[Tuple[int, int]]]
|
||||
|
|
@ -357,7 +372,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
"""
|
||||
|
||||
if cuda_visible_devices is None:
|
||||
|
|
@ -375,7 +390,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
@ttl_cache(ttl=300.0)
|
||||
def _parse_cuda_visible_devices(cuda_visible_devices: str) -> Union[List[int],
|
||||
List[Tuple[int, int]]]:
|
||||
"""The underlining implementation for `parse_cuda_visible_devices`. The result will be cached."""
|
||||
"""The underlining implementation for ``parse_cuda_visible_devices``. The result will be cached."""
|
||||
|
||||
def from_index_or_uuid(index_or_uuid: Union[int, str]) -> 'Device':
|
||||
nonlocal use_integer_identifiers
|
||||
|
|
@ -430,15 +445,17 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
bus_id: Optional[str] = None) -> 'Device':
|
||||
"""Creates a new instance of Device. The type of the result is determined by the given argument.
|
||||
|
||||
- (index: int) -> PhysicalDevice
|
||||
- (index: (int, int)) -> MigDevice
|
||||
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
|
||||
- (bus_id: str) -> PhysicalDevice
|
||||
.. code-block:: python
|
||||
|
||||
- (index: int) -> PhysicalDevice
|
||||
- (index: (int, int)) -> MigDevice
|
||||
- (uuid: str) -> Union[PhysicalDevice, MigDevice] # depending on the UUID value
|
||||
- (bus_id: str) -> PhysicalDevice
|
||||
|
||||
Note: This method takes exact 1 non-None argument.
|
||||
|
||||
Returns: Union[PhysicalDevice, MigDevice]
|
||||
A `PhysicalDevice` instance or a `MigDevice` instance.
|
||||
A ``PhysicalDevice`` instance or a ``MigDevice`` instance.
|
||||
|
||||
Raises:
|
||||
TypeError:
|
||||
|
|
@ -480,7 +497,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
def __init__(self, index: Optional[Union[int, str]] = None, *,
|
||||
uuid: Optional[str] = None,
|
||||
bus_id: Optional[str] = None) -> None:
|
||||
"""Initializes the instance created by `__new__()`."""
|
||||
"""Initializes the instance created by ``__new__()``."""
|
||||
|
||||
if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID
|
||||
index, uuid = None, index
|
||||
|
|
@ -545,12 +562,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
def __getattr__(self, name: str) -> Union[Any, Callable[..., Any]]:
|
||||
"""Get the object attribute.
|
||||
|
||||
If the attribute is not defined, make a method from `pynvml.nvmlDeviceGet<AttributeName>(handle)`.
|
||||
If the attribute is not defined, make a method from ``pynvml.nvmlDeviceGet<AttributeName>(handle)``.
|
||||
The attribute name will be converted to PascalCase string.
|
||||
|
||||
Raises:
|
||||
AttributeError:
|
||||
If the attribute is not defined in `pynvml.py`.
|
||||
If the attribute is not defined in ``pynvml.py``.
|
||||
|
||||
Examples:
|
||||
|
||||
|
|
@ -646,9 +663,9 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
RuntimeError:
|
||||
If the current device is not visible to CUDA applications (i.e. not listed in `CUDA_VISIBLE_DEVICES`).
|
||||
If the current device is not visible to CUDA applications (i.e. not listed in ``CUDA_VISIBLE_DEVICES``).
|
||||
"""
|
||||
|
||||
if self._cuda_index is None:
|
||||
|
|
@ -668,10 +685,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The official product name of the GPU. This is an alphanumeric string. For all products.
|
||||
|
||||
Returns: Union[str, NaType]
|
||||
The official product name, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The official product name, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
|
||||
"""
|
||||
|
||||
|
|
@ -684,10 +703,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
not correspond to any physical label on the board.
|
||||
|
||||
Returns: Union[str, NaType]
|
||||
The UUID of the device, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The UUID of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
|
||||
"""
|
||||
|
||||
|
|
@ -699,10 +720,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""PCI bus ID as "domain:bus:device.function", in hex.
|
||||
|
||||
Returns: Union[str, NaType]
|
||||
The PCI bus ID of the device, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The PCI bus ID of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pci.bus_id
|
||||
"""
|
||||
|
||||
|
|
@ -715,10 +738,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
unique immutable alphanumeric value.
|
||||
|
||||
Returns: Union[str, NaType]
|
||||
The serial number of the device, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The serial number of the device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=serial
|
||||
"""
|
||||
|
||||
|
|
@ -730,7 +755,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Returns a named tuple with memory information (in bytes) for the device.
|
||||
|
||||
Returns: MemoryInfo(total, free, used)
|
||||
A named tuple with memory information, the item could be `nvitop.NA` (str: 'N/A') when not available.
|
||||
A named tuple with memory information, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
memory_info = nvml.nvmlQuery('nvmlDeviceGetMemoryInfo', self.handle)
|
||||
|
|
@ -742,10 +767,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total installed GPU memory in bytes.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total installed GPU memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total installed GPU memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.total
|
||||
"""
|
||||
|
||||
|
|
@ -757,10 +784,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total memory allocated by active contexts in bytes.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total memory allocated by active contexts in bytes, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total memory allocated by active contexts in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.used
|
||||
"""
|
||||
|
||||
|
|
@ -770,10 +799,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total free memory in bytes.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total free memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total free memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=memory.free
|
||||
"""
|
||||
|
||||
|
|
@ -783,7 +814,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total installed GPU memory in human readable format.
|
||||
|
||||
Returns: Union[str, NaType]
|
||||
Total installed GPU memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total installed GPU memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
if self._memory_total_human is NA:
|
||||
|
|
@ -794,7 +825,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total memory allocated by active contexts in human readable format.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total memory allocated by active contexts in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total memory allocated by active contexts in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
return bytes2human(self.memory_used())
|
||||
|
|
@ -803,7 +834,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total free memory in human readable format.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total free memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total free memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return bytes2human(self.memory_free())
|
||||
|
|
@ -812,7 +843,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The percentage of used memory over total memory (0 <= p <= 100).
|
||||
|
||||
Returns: Union[float, NaType]
|
||||
The percentage of used memory over total memory, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The percentage of used memory over total memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
memory_info = self.memory_info()
|
||||
|
|
@ -835,7 +866,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Returns a named tuple with BAR1 memory information (in bytes) for the device.
|
||||
|
||||
Returns: MemoryInfo(total, free, used)
|
||||
A named tuple with BAR1 memory information, the item could be `nvitop.NA` (str: 'N/A') when not available.
|
||||
A named tuple with BAR1 memory information, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
memory_info = nvml.nvmlQuery('nvmlDeviceGetBAR1MemoryInfo', self.handle)
|
||||
|
|
@ -847,7 +878,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total BAR1 memory in bytes.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return self.bar1_memory_info().total
|
||||
|
|
@ -856,7 +887,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total used BAR1 memory in bytes.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total used BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total used BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return self.bar1_memory_info().used
|
||||
|
|
@ -865,7 +896,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total free BAR1 memory in bytes.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total free BAR1 memory in bytes, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total free BAR1 memory in bytes, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return self.bar1_memory_info().free
|
||||
|
|
@ -874,7 +905,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total BAR1 memory in human readable format.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return bytes2human(self.bar1_memory_total())
|
||||
|
|
@ -883,7 +914,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total used BAR1 memory in human readable format.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total used BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total used BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return bytes2human(self.bar1_memory_used())
|
||||
|
|
@ -892,7 +923,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total free BAR1 memory in human readable format.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
Total free BAR1 memory in human readable format, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
Total free BAR1 memory in human readable format, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return bytes2human(self.bar1_memory_free())
|
||||
|
|
@ -901,7 +932,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The percentage of used BAR1 memory over total BAR1 memory (0 <= p <= 100).
|
||||
|
||||
Returns: Union[float, NaType]
|
||||
The percentage of used BAR1 memory over total BAR1 memory, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The percentage of used BAR1 memory over total BAR1 memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
memory_info = self.bar1_memory_info()
|
||||
|
|
@ -924,7 +955,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Returns a named tuple with GPU utilization rates (in percentage) for the device.
|
||||
|
||||
Returns: UtilizationRates(gpu, memory, encoder, decoder)
|
||||
A named tuple with GPU utilization rates (in percentage) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available.
|
||||
A named tuple with GPU utilization rates (in percentage) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
gpu, memory, encoder, decoder = NA, NA, NA, NA
|
||||
|
|
@ -948,10 +979,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
The sample period may be between 1 second and 1/6 second depending on the product.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The GPU utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The GPU utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.gpu
|
||||
"""
|
||||
|
||||
|
|
@ -964,10 +997,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
The sample period may be between 1 second and 1/6 second depending on the product.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The memory bandwidth utilization rate of the GPU in percentage, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The memory bandwidth utilization rate of the GPU in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=utilization.memory
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
|
|
@ -977,7 +1012,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The encoder utilization rate in percentage.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The encoder utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The encoder utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return self.utilization_rates().encoder
|
||||
|
|
@ -986,7 +1021,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The decoder utilization rate in percentage.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The decoder utilization rate in percentage, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The decoder utilization rate in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
return self.utilization_rates().decoder
|
||||
|
|
@ -997,7 +1032,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Returns a named tuple with current clock speeds (in MHz) for the device.
|
||||
|
||||
Returns: ClockInfos(graphics, sm, memory, video)
|
||||
A named tuple with current clock speeds (in MHz) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available.
|
||||
A named tuple with current clock speeds (in MHz) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
return ClockInfos(
|
||||
|
|
@ -1015,7 +1050,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Returns a named tuple with maximum clock speeds (in MHz) for the device.
|
||||
|
||||
Returns: ClockInfos(graphics, sm, memory, video)
|
||||
A named tuple with maximum clock speeds (in MHz) for the device, the item could be `nvitop.NA` (str: 'N/A') when not available.
|
||||
A named tuple with maximum clock speeds (in MHz) for the device, the item could be ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
clock_infos = self._max_clock_infos._asdict()
|
||||
|
|
@ -1042,10 +1077,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Current frequency of graphics (shader) clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The current frequency of graphics (shader) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The current frequency of graphics (shader) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.graphics
|
||||
"""
|
||||
|
||||
|
|
@ -1055,10 +1092,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Current frequency of SM (Streaming Multiprocessor) clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The current frequency of SM (Streaming Multiprocessor) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The current frequency of SM (Streaming Multiprocessor) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.sm
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
|
|
@ -1068,10 +1107,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Current frequency of memory clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The current frequency of memory clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The current frequency of memory clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.memory
|
||||
"""
|
||||
|
||||
|
|
@ -1081,10 +1122,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Current frequency of video encoder/decoder clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The current frequency of video encoder/decoder clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The current frequency of video encoder/decoder clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.current.video
|
||||
"""
|
||||
|
||||
|
|
@ -1094,10 +1137,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Maximum frequency of graphics (shader) clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The maximum frequency of graphics (shader) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The maximum frequency of graphics (shader) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.graphics
|
||||
"""
|
||||
|
||||
|
|
@ -1107,10 +1152,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Maximum frequency of SM (Streaming Multiprocessor) clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The maximum frequency of SM (Streaming Multiprocessor) clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The maximum frequency of SM (Streaming Multiprocessor) clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.sm
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
|
|
@ -1120,10 +1167,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Maximum frequency of memory clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The maximum frequency of memory clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The maximum frequency of memory clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.memory
|
||||
"""
|
||||
|
||||
|
|
@ -1133,10 +1182,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Maximum frequency of video encoder/decoder clock in MHz.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The maximum frequency of video encoder/decoder clock in MHz, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The maximum frequency of video encoder/decoder clock in MHz, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=clocks.max.video
|
||||
"""
|
||||
|
||||
|
|
@ -1151,10 +1202,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
because they rely on cooling via fans in the surrounding enclosure.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The fan speed value in percentage, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The fan speed value in percentage, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=fan.speed
|
||||
"""
|
||||
|
||||
|
|
@ -1165,10 +1218,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Core GPU temperature. in degrees C.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The core GPU temperature in Celsius degrees, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The core GPU temperature in Celsius degrees, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=temperature.gpu
|
||||
"""
|
||||
|
||||
|
|
@ -1180,10 +1235,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The last measured power draw for the entire board in milliwatts.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The power draw for the entire board in milliwatts, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The power draw for the entire board in milliwatts, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.draw)" * 1000 ))
|
||||
"""
|
||||
|
||||
|
|
@ -1197,10 +1254,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""The software power limit in milliwatts. Set by software like nvidia-smi.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The software power limit in milliwatts, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The software power limit in milliwatts, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
$(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.limit)" * 1000 ))
|
||||
"""
|
||||
|
||||
|
|
@ -1230,10 +1289,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
Returns: Union[str, NaType]
|
||||
- 'Disabled': if not an active display device.
|
||||
- 'Enabled': if an active display device.
|
||||
- `nvitop.NA` (str: 'N/A'): if not available.
|
||||
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_active
|
||||
"""
|
||||
|
||||
|
|
@ -1248,10 +1309,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
Returns: Union[str, NaType]
|
||||
- 'Disabled': if the display mode is disabled.
|
||||
- 'Enabled': if the display mode is enabled.
|
||||
- `nvitop.NA` (str: 'N/A'): if not available.
|
||||
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=display_mode
|
||||
"""
|
||||
|
||||
|
|
@ -1268,10 +1331,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
Returns: Union[str, NaType]
|
||||
- 'WDDM': for WDDM driver model on Windows.
|
||||
- 'WDM': for TTC (WDM) driver model on Windows.
|
||||
- `nvitop.NA` (str: 'N/A'): if not available, e.g. on Linux.
|
||||
- ``nvitop.NA`` (str: ``'N/A'``): if not available, e.g. on Linux.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=driver_model.current
|
||||
"""
|
||||
|
||||
|
|
@ -1292,10 +1357,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
Returns: Union[str, NaType]
|
||||
- 'Disabled': if the persistence mode is disabled.
|
||||
- 'Enabled': if the persistence mode is enabled.
|
||||
- `nvitop.NA` (str: 'N/A'): if not available.
|
||||
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=persistence_mode
|
||||
"""
|
||||
|
||||
|
|
@ -1307,10 +1374,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
P12 (minimum performance).
|
||||
|
||||
Returns: Union[str, NaType]
|
||||
The current performance state in format `P<int>`, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The current performance state in format ``P<int>``, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pstate
|
||||
"""
|
||||
|
||||
|
|
@ -1324,10 +1393,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
"""Total errors detected across entire chip.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The total number of uncorrected errors in volatile ECC memory, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The total number of uncorrected errors in volatile ECC memory, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=ecc.errors.uncorrected.volatile.total
|
||||
""" # pylint: disable=line-too-long
|
||||
|
||||
|
|
@ -1345,10 +1416,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
- 'Exclusive Thread': deprecated, use Exclusive Process instead
|
||||
- 'Prohibited': means no contexts are allowed per device (no compute apps).
|
||||
- 'Exclusive Process': means only one context is allowed per device, usable from multiple threads at a time.
|
||||
- `nvitop.NA` (str: 'N/A'): if not available.
|
||||
- ``nvitop.NA`` (str: ``'N/A'``): if not available.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=compute_mode
|
||||
"""
|
||||
|
||||
|
|
@ -1375,10 +1448,12 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
Returns: Union[str, NaType]
|
||||
- 'Disabled': if the MIG mode is disabled.
|
||||
- 'Enabled': if the MIG mode is enabled.
|
||||
- `nvitop.NA` (str: 'N/A'): if not available, e.g. the GPU does not support MIG mode.
|
||||
- ``nvitop.NA`` (str: ``'N/A'``): if not available, e.g. the GPU does not support MIG mode.
|
||||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=mig.mode.current
|
||||
"""
|
||||
|
||||
|
|
@ -1390,7 +1465,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return {0: 'Disabled', 1: 'Enabled'}.get(mig_mode, NA)
|
||||
|
||||
def is_mig_mode_enabled(self) -> bool:
|
||||
"""Returns whether the MIG mode is enabled on the device. Returns `False` if MIG mode is
|
||||
"""Returns whether the MIG mode is enabled on the device. Returns ``False`` if MIG mode is
|
||||
disabled or the device does not support MIG mode.
|
||||
"""
|
||||
|
||||
|
|
@ -1440,7 +1515,7 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
|
|||
return processes
|
||||
|
||||
def as_snapshot(self) -> Snapshot:
|
||||
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`."""
|
||||
"""Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
|
||||
|
||||
with self.oneshot():
|
||||
return Snapshot(real=self, index=self.index, physical_index=self.physical_index,
|
||||
|
|
@ -1544,6 +1619,8 @@ class PhysicalDevice(Device):
|
|||
|
||||
Command line equivalent:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=index
|
||||
"""
|
||||
|
||||
|
|
@ -1636,18 +1713,18 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
|
|||
"""Returns a list of MIG devices of the given indices.
|
||||
|
||||
Args:
|
||||
indices (list of tuple of two ints):
|
||||
indices (Iterable[Tuple[int, int]]):
|
||||
Indices of the MIG devices. Each index is a tuple of two integers.
|
||||
|
||||
Returns: List[MigDevice]
|
||||
A list of `MigDevice` instances of the given indices.
|
||||
A list of ``MigDevice`` instances of the given indices.
|
||||
"""
|
||||
|
||||
return list(map(cls, indices))
|
||||
|
||||
def __init__(self, index: Optional[Union[Tuple[int, int], str]] = None, *, # pylint: disable=super-init-not-called
|
||||
uuid: Optional[str] = None) -> None:
|
||||
"""Initializes the instance created by `__new__()`."""
|
||||
"""Initializes the instance created by ``__new__()``."""
|
||||
|
||||
if isinstance(index, str) and self.UUID_PATTERN.match(index) is not None: # passed by UUID
|
||||
index, uuid = None, index
|
||||
|
|
@ -1727,7 +1804,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
|
|||
"""The gpu instance ID of the MIG device.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The gpu instance ID of the MIG device, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The gpu instance ID of the MIG device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
if self._gpu_instance_id is NA:
|
||||
|
|
@ -1741,7 +1818,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
|
|||
"""The compute instance ID of the MIG device.
|
||||
|
||||
Returns: Union[int, NaType]
|
||||
The compute instance ID of the MIG device, or `nvitop.NA` (str: 'N/A') when not available.
|
||||
The compute instance ID of the MIG device, or ``nvitop.NA`` (str: ``'N/A'``) when not available.
|
||||
"""
|
||||
|
||||
if self._compute_instance_id is NA:
|
||||
|
|
@ -1752,7 +1829,7 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
|
|||
return self._compute_instance_id
|
||||
|
||||
def as_snapshot(self) -> Snapshot:
|
||||
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`."""
|
||||
"""Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
|
||||
|
||||
snapshot = super().as_snapshot()
|
||||
snapshot.mig_index = self.mig_index
|
||||
|
|
@ -1764,15 +1841,18 @@ class MigDevice(Device): # pylint: disable=too-many-instance-attributes
|
|||
|
||||
class CudaDevice(Device):
|
||||
"""Class for devices enumerated over the CUDA ordinal. The order can be vary for different
|
||||
`CUDA_VISIBLE_DEVICES` environment variable.
|
||||
environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
|
||||
See also for CUDA Device Enumeration:
|
||||
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
|
||||
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
|
||||
|
||||
`CudaDevice.__new__()` returns different types depending on the given arguments.
|
||||
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
|
||||
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
|
||||
``CudaDevice.__new__()`` returns different types depending on the given arguments.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
|
||||
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
|
||||
- (nvml_index: int) -> CudaDevice
|
||||
- (nvml_index: (int, int)) -> CudaMigDevice
|
||||
|
||||
|
|
@ -1808,16 +1888,50 @@ class CudaDevice(Device):
|
|||
|
||||
@classmethod
|
||||
def count(cls) -> int:
|
||||
"""The number of GPUs visible to CUDA applications."""
|
||||
"""The number of GPUs visible to CUDA applications.
|
||||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
"""
|
||||
|
||||
return len(super().parse_cuda_visible_devices())
|
||||
|
||||
@classmethod
|
||||
def all(cls) -> List['CudaDevice']:
|
||||
"""All CUDA visible devices.
|
||||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
"""
|
||||
|
||||
return cls.from_indices()
|
||||
|
||||
@classmethod
|
||||
def from_indices(cls, indices: Optional[Union[int, Iterable[int]]] = None) -> List['CudaDevice']:
|
||||
"""Returns a list of CUDA devices of the given CUDA indices.
|
||||
The CUDA ordinal will be enumerate from the environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
|
||||
See also for CUDA Device Enumeration:
|
||||
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars
|
||||
- https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#cuda-visible-devices
|
||||
|
||||
Args:
|
||||
cuda_indices (Iterable[int]):
|
||||
The value of ``CUDA_VISIBLE_DEVICES``, if not given, the value from the environment
|
||||
will be used.
|
||||
|
||||
Returns: List[CudaDevice]
|
||||
A list of ``CudaDevice`` of the given CUDA indices.
|
||||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
RuntimeError:
|
||||
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
"""
|
||||
|
||||
return super().from_cuda_indices(indices)
|
||||
|
||||
def __new__(cls, cuda_index: Optional[int] = None, *,
|
||||
|
|
@ -1825,15 +1939,17 @@ class CudaDevice(Device):
|
|||
uuid: Optional[str] = None) -> 'Device':
|
||||
"""Creates a new instance of CudaDevice. The type of the result is determined by the given argument.
|
||||
|
||||
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
|
||||
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on `CUDA_VISIBLE_DEVICES`
|
||||
- (nvml_index: int) -> CudaDevice
|
||||
- (nvml_index: (int, int)) -> CudaMigDevice
|
||||
.. code-block:: python
|
||||
|
||||
- (index: int) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
|
||||
- (uuid: str) -> Union[CudaDevice, CudaMigDevice] # depending on ``CUDA_VISIBLE_DEVICES``
|
||||
- (nvml_index: int) -> CudaDevice
|
||||
- (nvml_index: (int, int)) -> CudaMigDevice
|
||||
|
||||
Note: This method takes exact 1 non-None argument.
|
||||
|
||||
Returns: Union[CudaDevice, CudaMigDevice]
|
||||
A `CudaDevice` instance or a `CudaMigDevice` instance.
|
||||
A ``CudaDevice`` instance or a ``CudaMigDevice`` instance.
|
||||
|
||||
Raises:
|
||||
TypeError:
|
||||
|
|
@ -1842,9 +1958,9 @@ class CudaDevice(Device):
|
|||
If the given index is a tuple but is not consist of two integers.
|
||||
Raises:
|
||||
RuntimeError:
|
||||
If the `CUDA_VISIBLE_DEVICES` environment variable is invalid (e.g. duplicate entries).
|
||||
If the environment variable ``CUDA_VISIBLE_DEVICES`` is invalid (e.g. duplicate entries).
|
||||
RuntimeError:
|
||||
If the index is out of range for the given `CUDA_VISIBLE_DEVICES` environment variable.
|
||||
If the index is out of range for the given environment variable ``CUDA_VISIBLE_DEVICES``.
|
||||
"""
|
||||
|
||||
if cuda_index is not None and nvml_index is None and uuid is None:
|
||||
|
|
@ -1861,7 +1977,7 @@ class CudaDevice(Device):
|
|||
def __init__(self, cuda_index: Optional[int] = None, *,
|
||||
nvml_index: Optional[Union[int, Tuple[int, int]]] = None,
|
||||
uuid: Optional[str] = None) -> None:
|
||||
"""Initializes the instance created by `__new__()`.
|
||||
"""Initializes the instance created by ``__new__()``.
|
||||
|
||||
Raises:
|
||||
RuntimeError:
|
||||
|
|
@ -1895,7 +2011,7 @@ class CudaDevice(Device):
|
|||
return self.__class__, (self._cuda_index,)
|
||||
|
||||
def as_snapshot(self) -> Snapshot:
|
||||
"""Returns a onetime snapshot of the device. The attributes are defined in `SNAPSHOT_KEYS`."""
|
||||
"""Returns a onetime snapshot of the device. The attributes are defined in ``SNAPSHOT_KEYS``."""
|
||||
|
||||
snapshot = super().as_snapshot()
|
||||
snapshot.cuda_index = self.cuda_index
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
|
||||
# License: GNU GPL version 3.
|
||||
|
||||
"""Shortcuts for package `psutil`.
|
||||
"""Shortcuts for package ``psutil``.
|
||||
|
||||
psutil is a cross-platform library for retrieving information on running processes
|
||||
and system utilization (CPU, memory, disks, network, sensors) in Python.
|
||||
|
|
@ -53,11 +53,11 @@ def swap_percent():
|
|||
|
||||
|
||||
ppid_map = _psutil._ppid_map # pylint: disable=protected-access
|
||||
"""Obtains a `{pid: ppid, ...}` dict for all running processes in one shot."""
|
||||
"""Obtains a ``{pid: ppid, ...}`` dict for all running processes in one shot."""
|
||||
|
||||
|
||||
def reverse_ppid_map(): # pylint: disable=function-redefined
|
||||
"""Obtains a `{ppid: [pid, ...], ...}` dict for all running processes in one shot."""
|
||||
"""Obtains a ``{ppid: [pid, ...], ...}`` dict for all running processes in one shot."""
|
||||
|
||||
tree = _defaultdict(list)
|
||||
for pid, ppid in ppid_map().items():
|
||||
|
|
|
|||
|
|
@ -18,11 +18,11 @@ import pynvml
|
|||
from nvitop.core.utils import NA, colored
|
||||
|
||||
|
||||
__all__ = ['nvml', 'nvmlCheckReturn', 'NVMLError']
|
||||
__all__ = ['libnvml', 'nvml', 'nvmlCheckReturn', 'NVMLError']
|
||||
|
||||
|
||||
class libnvml:
|
||||
"""The helper singleton class that holds members from package `nvidia-ml-py`."""
|
||||
"""The helper singleton class that holds members from package ``nvidia-ml-py``."""
|
||||
|
||||
NVMLError = pynvml.NVMLError
|
||||
"""Base exception class for NVML query errors."""
|
||||
|
|
@ -34,7 +34,7 @@ class libnvml:
|
|||
c_nvmlDevice_t = pynvml.c_nvmlDevice_t
|
||||
|
||||
def __new__(cls) -> 'libnvml':
|
||||
"""Gets the singleton instance of `libnvml`."""
|
||||
"""Gets the singleton instance of ``libnvml``."""
|
||||
|
||||
if not hasattr(cls, '_instance'):
|
||||
instance = cls._instance = super().__new__(cls)
|
||||
|
|
@ -62,13 +62,13 @@ class libnvml:
|
|||
pass
|
||||
|
||||
def __enter__(self) -> 'libnvml':
|
||||
"""Entry of the context manager for `with` statement."""
|
||||
"""Entry of the context manager for ``with`` statement."""
|
||||
|
||||
self._lazy_init()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs) -> None:
|
||||
"""Shutdowns the NVML context in the context manager for `with` statement."""
|
||||
"""Shutdowns the NVML context in the context manager for ``with`` statement."""
|
||||
|
||||
self.__del__()
|
||||
|
||||
|
|
@ -100,8 +100,8 @@ class libnvml:
|
|||
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
|
||||
driver without reloading the kernel module.
|
||||
AttributeError:
|
||||
If cannot find function `nvmlInitWithFlags`, usually the `pynvml` module is overridden
|
||||
by other modules. Need to reinstall package `nvidia-ml-py`.
|
||||
If cannot find function ``nvmlInitWithFlags``, usually the ``pynvml`` module is overridden
|
||||
by other modules. Need to reinstall package ``nvidia-ml-py``.
|
||||
"""
|
||||
|
||||
self.nvmlInitWithFlags(0)
|
||||
|
|
@ -118,8 +118,8 @@ class libnvml:
|
|||
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
|
||||
driver without reloading the kernel module.
|
||||
AttributeError:
|
||||
If cannot find function `nvmlInitWithFlags`, usually the `pynvml` module is overridden
|
||||
by other modules. Need to reinstall package `nvidia-ml-py`.
|
||||
If cannot find function ``nvmlInitWithFlags``, usually the ``pynvml`` module is overridden
|
||||
by other modules. Need to reinstall package ``nvidia-ml-py``.
|
||||
"""
|
||||
|
||||
with self._lock:
|
||||
|
|
@ -179,7 +179,7 @@ class libnvml:
|
|||
If RM detects a driver/library version mismatch, usually after a upgrade for NVIDIA
|
||||
driver without reloading the kernel module.
|
||||
NVMLError_Uninitialized:
|
||||
If NVML was not first initialized with `nvmlInit()`.
|
||||
If NVML was not first initialized with ``nvmlInit()``.
|
||||
"""
|
||||
|
||||
pynvml.nvmlShutdown()
|
||||
|
|
@ -199,16 +199,16 @@ class libnvml:
|
|||
"""Calls a function with the given arguments from NVML. The NVML context will be lazily initialized.
|
||||
|
||||
Args:
|
||||
func (function or str):
|
||||
func (Union[Callable[..., Any], str]):
|
||||
The function to call. If it is given by string, lookup for the
|
||||
function first from `pynvml`.
|
||||
default (any):
|
||||
function first from ``pynvml``.
|
||||
default (Any):
|
||||
The default value if the query fails.
|
||||
ignore_errors (bool):
|
||||
Whether to ignore errors and return the default value.
|
||||
ignore_function_not_found (bool):
|
||||
Whether to ignore function not found errors and return the
|
||||
default value. If set to `False`, a error message will be logged
|
||||
default value. If set to ``False``, a error message will be logged
|
||||
to the logger.
|
||||
*args:
|
||||
Positional arguments to pass to the query function.
|
||||
|
|
@ -251,7 +251,7 @@ class libnvml:
|
|||
|
||||
@staticmethod
|
||||
def nvmlCheckReturn(retval: Any, types: Optional[Union[type, Tuple[type, ...]]] = None) -> bool:
|
||||
"""Checks the return value is not `nvitop.NA` and is one of the given types."""
|
||||
"""Checks the return value is not ``nvitop.NA`` and is one of the given types."""
|
||||
|
||||
if types is None:
|
||||
return retval != NA
|
||||
|
|
@ -259,7 +259,7 @@ class libnvml:
|
|||
|
||||
|
||||
nvml = libnvml()
|
||||
"""The singleton instance of `libnvml`."""
|
||||
"""The singleton instance of class ``libnvml``."""
|
||||
|
||||
nvmlCheckReturn = nvml.nvmlCheckReturn
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ _USE_FALLBACK_WHEN_RAISE = threading.local() # see also `GpuProcess.failsafe`
|
|||
def auto_garbage_clean(fallback=_RAISE):
|
||||
"""Removes the object references in the instance cache if the method call fails (the process is gone).
|
||||
|
||||
The fallback value will be used with `GpuProcess.failsafe` context manager, otherwise raises an
|
||||
The fallback value will be used with ``GpuProcess.failsafe`` context manager, otherwise raises an
|
||||
exception when falls.
|
||||
"""
|
||||
|
||||
|
|
@ -161,7 +161,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
|
|||
HOST_SNAPSHOTS = {}
|
||||
|
||||
def __new__(cls, pid: Optional[int] = None) -> 'HostProcess':
|
||||
"""Returns the cached instance of `HostProcess`."""
|
||||
"""Returns the cached instance of ``HostProcess``."""
|
||||
|
||||
if pid is None:
|
||||
pid = os.getpid()
|
||||
|
|
@ -248,7 +248,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
|
|||
|
||||
@memoize_when_activated
|
||||
def running_time(self) -> datetime.timedelta:
|
||||
"""The elapsed time this process has been running in `datetime.timedelta`."""
|
||||
"""The elapsed time this process has been running in ``datetime.timedelta``."""
|
||||
|
||||
return datetime.datetime.now() - datetime.datetime.fromtimestamp(self.create_time())
|
||||
|
||||
|
|
@ -272,7 +272,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
|
|||
return self.memory_info().rss
|
||||
|
||||
def parent(self) -> Union['HostProcess', None]:
|
||||
"""Returns the parent process as a `HostProcess` instance. Returns `None` if there is no parent."""
|
||||
"""Returns the parent process as a ``HostProcess`` instance. Returns ``None`` if there is no parent."""
|
||||
|
||||
parent = super().parent()
|
||||
if parent is not None:
|
||||
|
|
@ -280,8 +280,8 @@ class HostProcess(host.Process, metaclass=ABCMeta):
|
|||
return None
|
||||
|
||||
def children(self, recursive: bool = False) -> List['HostProcess']:
|
||||
"""Return the children of this process as a list of `HostProcess` instances.
|
||||
If *recursive* is `True` return all the descendants.
|
||||
"""Return the children of this process as a list of ``HostProcess`` instances.
|
||||
If *recursive* is ``True`` return all the descendants.
|
||||
"""
|
||||
|
||||
return [HostProcess(child.pid) for child in super().children(recursive)]
|
||||
|
|
@ -294,7 +294,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
|
|||
Internally different process info (e.g. name, ppid, uids, gids, ...) may be fetched by using
|
||||
the same routine, but only one information is returned and the others are discarded. When
|
||||
using this context manager the internal routine is executed once (in the example below on
|
||||
`name()`) and the other info are cached.
|
||||
``name()``) and the other info are cached.
|
||||
|
||||
The cache is cleared when exiting the context manager block. The advice is to use this every
|
||||
time you retrieve more than one information about the process.
|
||||
|
|
@ -345,7 +345,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
"""Represents a process with the given PID running on the given GPU device.
|
||||
The instance will be cache during the lifetime of the process.
|
||||
|
||||
The same host process can use multiple GPU devices. The `GpuProcess` instances representing the
|
||||
The same host process can use multiple GPU devices. The ``GpuProcess`` instances representing the
|
||||
same PID on the host but different GPU devices are different.
|
||||
"""
|
||||
|
||||
|
|
@ -355,7 +355,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
def __new__(cls, pid: int, device: 'Device',
|
||||
gpu_memory: Optional[Union[int, NaType]] = None, # pylint: disable=unused-argument
|
||||
type: Optional[Union[str, NaType]] = None) -> 'GpuProcess': # pylint: disable=unused-argument,redefined-builtin
|
||||
"""Returns the cached instance of `GpuProcess`."""
|
||||
"""Returns the cached instance of ``GpuProcess``."""
|
||||
|
||||
if pid is None:
|
||||
pid = os.getpid()
|
||||
|
|
@ -385,7 +385,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
def __init__(self, pid: int, device: 'Device', # pylint: disable=unused-argument
|
||||
gpu_memory: Optional[Union[int, NaType]] = None,
|
||||
type: Optional[Union[str, NaType]] = None) -> None: # pylint: disable=redefined-builtin
|
||||
"""Initializes the instance returned by `__new__()`."""
|
||||
"""Initializes the instance returned by ``__new__()``."""
|
||||
|
||||
if gpu_memory is None and not hasattr(self, '_gpu_memory'):
|
||||
gpu_memory = NA
|
||||
|
|
@ -456,54 +456,54 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
"""The GPU device the process running on.
|
||||
|
||||
The same host process can use multiple GPU devices.
|
||||
The `GpuProcess` instances representing the same PID on the host
|
||||
The ``GpuProcess`` instances representing the same PID on the host
|
||||
but different GPU devices are different.
|
||||
"""
|
||||
|
||||
return self._device
|
||||
|
||||
def gpu_instance_id(self) -> Union[int, NaType]:
|
||||
"""The GPU instance ID of the MIG device, or `nvitop.NA` if not available."""
|
||||
"""The GPU instance ID of the MIG device, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_instance_id
|
||||
|
||||
def compute_instance_id(self) -> Union[int, NaType]:
|
||||
"""The compute instance ID of the MIG device, or `nvitop.NA` if not available."""
|
||||
"""The compute instance ID of the MIG device, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._compute_instance_id
|
||||
|
||||
def gpu_memory(self) -> Union[int, NaType]: # in bytes
|
||||
"""The used GPU memory in bytes, or `nvitop.NA` if not available."""
|
||||
"""The used GPU memory in bytes, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_memory
|
||||
|
||||
def gpu_memory_human(self) -> Union[str, NaType]: # in human readable
|
||||
"""The used GPU memory in human readable format, or `nvitop.NA` if not available."""
|
||||
"""The used GPU memory in human readable format, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_memory_human
|
||||
|
||||
def gpu_memory_percent(self) -> Union[float, NaType]: # in percentage
|
||||
"""The percentage of used GPU memory by the process, or `nvitop.NA` if not available."""
|
||||
"""The percentage of used GPU memory by the process, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_memory_percent
|
||||
|
||||
def gpu_sm_utilization(self) -> Union[int, NaType]: # in percentage
|
||||
"""The utilization rate of SM (Streaming Multiprocessor), or `nvitop.NA` if not available."""
|
||||
"""The utilization rate of SM (Streaming Multiprocessor), or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_sm_utilization
|
||||
|
||||
def gpu_memory_utilization(self) -> Union[int, NaType]: # in percentage
|
||||
"""The utilization rate of GPU memory bandwidth, or `nvitop.NA` if not available."""
|
||||
"""The utilization rate of GPU memory bandwidth, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_memory_utilization
|
||||
|
||||
def gpu_encoder_utilization(self) -> Union[int, NaType]: # in percentage
|
||||
"""The utilization rate of the encoder, or `nvitop.NA` if not available."""
|
||||
"""The utilization rate of the encoder, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_encoder_utilization
|
||||
|
||||
def gpu_decoder_utilization(self) -> Union[int, NaType]: # in percentage
|
||||
"""The utilization rate of the decoder, or `nvitop.NA` if not available."""
|
||||
"""The utilization rate of the decoder, or ``nvitop.NA`` if not available."""
|
||||
|
||||
return self._gpu_decoder_utilization
|
||||
|
||||
|
|
@ -583,7 +583,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
|
||||
@auto_garbage_clean(fallback=NA)
|
||||
def running_time(self) -> Union[datetime.timedelta, NaType]:
|
||||
"""The elapsed time this process has been running in `datetime.timedelta`."""
|
||||
"""The elapsed time this process has been running in ``datetime.timedelta``."""
|
||||
|
||||
return self.host.running_time()
|
||||
|
||||
|
|
@ -733,10 +733,10 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
|
|||
@classmethod
|
||||
def take_snapshots(cls, gpu_processes: Iterable['GpuProcess'], *, # batched version of `as_snapshot`
|
||||
failsafe=False) -> List[Snapshot]:
|
||||
"""Takes snapshots for a list of `GpuProcess` instances.
|
||||
"""Takes snapshots for a list of ``GpuProcess`` instances.
|
||||
|
||||
If *failsafe* is `True`, then if any method fails, the fallback value in
|
||||
`auto_garbage_clean(fallback)` will be used.
|
||||
If *failsafe* is ``True``, then if any method fails, the fallback value in
|
||||
``auto_garbage_clean(fallback)`` will be used.
|
||||
"""
|
||||
|
||||
cache = {}
|
||||
|
|
|
|||
|
|
@ -77,28 +77,28 @@ class NotApplicableType(str):
|
|||
return math.nan
|
||||
|
||||
def __lt__(self, x):
|
||||
"""The `NA` is always greater than any number. Use the dictionary order for string."""
|
||||
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
|
||||
|
||||
if isinstance(x, (int, float)):
|
||||
return False
|
||||
return super().__lt__(x)
|
||||
|
||||
def __le__(self, x):
|
||||
"""The `NA` is always greater than any number. Use the dictionary order for string."""
|
||||
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
|
||||
|
||||
if isinstance(x, (int, float)):
|
||||
return False
|
||||
return super().__le__(x)
|
||||
|
||||
def __gt__(self, x):
|
||||
"""The `NA` is always greater than any number. Use the dictionary order for string."""
|
||||
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
|
||||
|
||||
if isinstance(x, (int, float)):
|
||||
return True
|
||||
return super().__gt__(x)
|
||||
|
||||
def __ge__(self, x):
|
||||
"""The `NA` is always greater than any number. Use the dictionary order for string."""
|
||||
"""The ``NA`` is always greater than any number. Use the dictionary order for string."""
|
||||
|
||||
if isinstance(x, (int, float)):
|
||||
return True
|
||||
|
|
@ -116,7 +116,7 @@ class NotApplicableType(str):
|
|||
# NA is NotApplicableType() -> True (NotApplicableType is a singleton class)
|
||||
NaType = NotApplicableType
|
||||
NA = NotApplicable = NotApplicableType()
|
||||
"""The singleton instance of `NotApplicableType`. The actual value is 'NA'."""
|
||||
"""The singleton instance of ``NotApplicableType``. The actual value is 'NA'."""
|
||||
|
||||
|
||||
KiB = 1 << 10
|
||||
|
|
@ -167,7 +167,7 @@ def bytes2human(x): # pylint: disable=too-many-return-statements
|
|||
|
||||
|
||||
def timedelta2human(dt):
|
||||
"""Converts `datetime.timedelta` instance to a human readable string."""
|
||||
"""Converts ``datetime.timedelta`` instance to a human readable string."""
|
||||
|
||||
if isinstance(dt, (int, float)):
|
||||
dt = datetime.timedelta(seconds=dt)
|
||||
|
|
@ -209,7 +209,7 @@ def boolify(string, default=None):
|
|||
|
||||
class Snapshot:
|
||||
"""A dict-like object holds the snapshot values.
|
||||
The value can be accessed by `snapshot.name` or `snapshot[name]` syntax.
|
||||
The value can be accessed by ``snapshot.name`` or ``snapshot['name']`` syntax.
|
||||
|
||||
Missing attributes will be automatically fetched from the original object.
|
||||
"""
|
||||
|
|
@ -253,7 +253,7 @@ class Snapshot:
|
|||
return attribute
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Supports `dict[name]` syntax."""
|
||||
"""Supports ``dict['name']`` syntax."""
|
||||
|
||||
try:
|
||||
return self.__getattr__(name)
|
||||
|
|
@ -261,7 +261,7 @@ class Snapshot:
|
|||
raise KeyError from e
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
"""Supports `dict[name] = value` syntax."""
|
||||
"""Supports ``dict['name'] = value`` syntax."""
|
||||
|
||||
self.__setattr__(name, value)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue