docs: add notes to set CUDA_DEVICE_ORDER="PCI_BUS_ID"

This commit is contained in:
Xuehai Pan 2023-05-01 02:35:12 +08:00
parent 4bb3da75f3
commit afd9ba2514
4 changed files with 10 additions and 1 deletions

View file

@ -33,7 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Further isolate the `CUDA_VISIBLE_DEVICE` parser in a subprocess by [@XuehaiPan](https://github.com/XuehaiPan) in [#70](https://github.com/XuehaiPan/nvitop/pull/70).
- Further isolate the `CUDA_VISIBLE_DEVICES` parser in a subprocess by [@XuehaiPan](https://github.com/XuehaiPan) in [#70](https://github.com/XuehaiPan/nvitop/pull/70).
------

View file

@ -686,6 +686,7 @@ for device in devices:
```python
In [1]: from nvitop import take_snapshots, Device
...: import os
...: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
...: os.environ['CUDA_VISIBLE_DEVICES'] = '1,0' # comma-separated integers or UUID strings
In [2]: take_snapshots() # equivalent to `take_snapshots(Device.all())`
@ -763,6 +764,7 @@ Please refer to section [Low-level APIs](#low-level-apis) for more information.
```python
In [1]: from nvitop import ResourceMetricCollector, Device
...: import os
...: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
...: os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0' # comma-separated integers or UUID strings
In [2]: collector = ResourceMetricCollector() # log all devices and descendant processes of the current process on the GPUs
@ -983,6 +985,7 @@ In [1]: from nvitop import (
...: NA,
...: )
...: import os
...: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
...: os.environ['CUDA_VISIBLE_DEVICES'] = '9,8,7,6' # comma-separated integers or UUID strings
In [2]: Device.driver_version()

View file

@ -80,6 +80,7 @@ def take_snapshots(
Examples:
>>> from nvitop import take_snapshots, Device
>>> import os
>>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
>>> os.environ['CUDA_VISIBLE_DEVICES'] = '1,0'
>>> take_snapshots() # equivalent to `take_snapshots(Device.all())`
@ -306,6 +307,7 @@ class ResourceMetricCollector: # pylint: disable=too-many-instance-attributes
Examples:
>>> import os
>>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
>>> os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'
>>> from nvitop import ResourceMetricCollector, Device

View file

@ -69,6 +69,7 @@ Examples:
)
>>> import os
>>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
>>> os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'
>>> CudaDevice.count() # number of NVIDIA GPUs visible to CUDA applications
@ -2090,6 +2091,7 @@ class CudaDevice(Device):
Examples:
>>> import os
>>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
>>> os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'
>>> CudaDevice.count() # number of NVIDIA GPUs visible to CUDA applications
@ -2355,6 +2357,7 @@ def parse_cuda_visible_devices(
Examples:
>>> import os
>>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
>>> os.environ['CUDA_VISIBLE_DEVICES'] = '6,5'
>>> parse_cuda_visible_devices() # parse the `CUDA_VISIBLE_DEVICES` environment variable to NVML indices
[6, 5]
@ -2411,6 +2414,7 @@ def normalize_cuda_visible_devices(cuda_visible_devices: str | None = _VALUE_OMI
Examples:
>>> import os
>>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
>>> os.environ['CUDA_VISIBLE_DEVICES'] = '6,5'
>>> normalize_cuda_visible_devices() # normalize the `CUDA_VISIBLE_DEVICES` environment variable to UUID strings
'GPU-849d5a8d-610e-eeea-1fd4-81ff44a23794,GPU-18ef14e9-dec6-1d7e-1284-3010c6ce98b1'