docs: add more examples

Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
Xuehai Pan 2022-07-03 21:18:54 +08:00
parent cf9e10dc7e
commit 2906d4f043
3 changed files with 121 additions and 4 deletions

View file

@ -416,6 +416,8 @@ Please refer to [Resource Metric Collector](#resource-metric-collector) for an e
#### Quick Start
A minimal script to monitor the GPU devices based on APIs from `nvitop`:
```python
from nvitop import Device
@ -434,9 +436,63 @@ for device in devices:
print(f' - Processes ({len(processes)}): {sorted_pids}')
for pid in sorted_pids:
print(f' - {processes[pid]}')
print('-' * 80)
print('-' * 120)
```
Another more advanced approach with coloring:
```python
import time
from nvitop import Device, GpuProcess, NA, colored
print(colored(time.strftime('%a %b %d %H:%M:%S %Y'), color='red', attrs=('bold',)))
devices = Device.cuda.all() # or `Device.all()` to use NVML ordinal instead
separator = False
for device in devices:
processes = device.processes() # type: Dict[int, GpuProcess]
print(colored(str(device), color='green', attrs=('bold',)))
print(colored(' - Fan speed: ', color='blue', attrs=('bold',)) + f'{device.fan_speed()}%')
print(colored(' - Temperature: ', color='blue', attrs=('bold',)) + f'{device.temperature()}C')
print(colored(' - GPU utilization: ', color='blue', attrs=('bold',)) + f'{device.gpu_utilization()}%')
print(colored(' - Total memory: ', color='blue', attrs=('bold',)) + f'{device.memory_total_human()}')
print(colored(' - Used memory: ', color='blue', attrs=('bold',)) + f'{device.memory_used_human()}')
print(colored(' - Free memory: ', color='blue', attrs=('bold',)) + f'{device.memory_free_human()}')
if len(processes) > 0:
processes = GpuProcess.take_snapshots(processes.values(), failsafe=True)
processes.sort(key=lambda process: (process.username, process.pid))
print(colored(f' - Processes ({len(processes)}):', color='blue', attrs=('bold',)))
fmt = ' {pid:<5} {username:<8} {cpu:>5} {host_memory:>8} {time:>8} {gpu_memory:>8} {sm:>3} {command:<}'.format
print(colored(fmt(pid='PID', username='USERNAME',
cpu='CPU%', host_memory='HOST-MEM', time='TIME',
gpu_memory='GPU-MEM', sm='SM%',
command='COMMAND'),
attrs=('bold',)))
for snapshot in processes:
print(fmt(pid=snapshot.pid,
username=snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]),
cpu=snapshot.cpu_percent, host_memory=snapshot.host_memory_human,
time=snapshot.running_time_human,
gpu_memory=(snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'),
sm=snapshot.gpu_sm_utilization,
command=snapshot.command))
else:
print(colored(' - No Running Processes', attrs=('bold',)))
if separator:
print('-' * 120)
separator = True
```
<p align="center">
<img width="100%" src="https://user-images.githubusercontent.com/16078332/177041142-fe988d58-6a97-4559-84fd-b51204cf9231.png" alt="Demo">
</br>
An example monitoring script built with APIs from <code>nvitop</code>.
</p>
#### Status Snapshot
`nvitop` provides a helper function to retrieve the status of both GPU devices and GPU processes at once. You can type `help(nvitop.take_snapshots)` in Python REPL for detailed documentation.

View file

@ -5,9 +5,11 @@ Welcome to nvitop's documentation!
An interactive NVIDIA-GPU process viewer, the one-stop solution for GPU process management.
.. image:: https://user-images.githubusercontent.com/16078332/171005261-1aad126e-dc27-4ed3-a89b-7f9c1c998bf7.png
.. figure:: https://user-images.githubusercontent.com/16078332/171005261-1aad126e-dc27-4ed3-a89b-7f9c1c998bf7.png
:align: center
The CLI from ``nvitop``.
.. |GitHub| image:: https://img.shields.io/badge/GitHub-Homepage-blue?logo=github
.. _GitHub: https://github.com/XuehaiPan/nvitop
@ -77,6 +79,8 @@ If this repo is useful to you, please star ⭐️ it to let more people know
Quick Start
"""""""""""
A minimal script to monitor the GPU devices based on APIs from ``nvitop``:
.. code-block:: python
from nvitop import Device
@ -96,7 +100,62 @@ Quick Start
print(f' - Processes ({len(processes)}): {sorted_pids}')
for pid in sorted_pids:
print(f' - {processes[pid]}')
print('-' * 80)
print('-' * 120)
Another more advanced approach with coloring:
.. code-block:: python
import time
from nvitop import Device, GpuProcess, NA, colored
print(colored(time.strftime('%a %b %d %H:%M:%S %Y'), color='red', attrs=('bold',)))
devices = Device.cuda.all() # or `Device.all()` to use NVML ordinal instead
separator = False
for device in devices:
processes = device.processes() # type: Dict[int, GpuProcess]
print(colored(str(device), color='green', attrs=('bold',)))
print(colored(' - Fan speed: ', color='blue', attrs=('bold',)) + f'{device.fan_speed()}%')
print(colored(' - Temperature: ', color='blue', attrs=('bold',)) + f'{device.temperature()}C')
print(colored(' - GPU utilization: ', color='blue', attrs=('bold',)) + f'{device.gpu_utilization()}%')
print(colored(' - Total memory: ', color='blue', attrs=('bold',)) + f'{device.memory_total_human()}')
print(colored(' - Used memory: ', color='blue', attrs=('bold',)) + f'{device.memory_used_human()}')
print(colored(' - Free memory: ', color='blue', attrs=('bold',)) + f'{device.memory_free_human()}')
if len(processes) > 0:
processes = GpuProcess.take_snapshots(processes.values(), failsafe=True)
processes.sort(key=lambda process: (process.username, process.pid))
print(colored(f' - Processes ({len(processes)}):', color='blue', attrs=('bold',)))
fmt = ' {pid:<5} {username:<8} {cpu:>5} {host_memory:>8} {time:>8} {gpu_memory:>8} {sm:>3} {command:<}'.format
print(colored(fmt(pid='PID', username='USERNAME',
cpu='CPU%', host_memory='HOST-MEM', time='TIME',
gpu_memory='GPU-MEM', sm='SM%',
command='COMMAND'),
attrs=('bold',)))
for snapshot in processes:
print(fmt(pid=snapshot.pid,
username=snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]),
cpu=snapshot.cpu_percent, host_memory=snapshot.host_memory_human,
time=snapshot.running_time_human,
gpu_memory=(snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'),
sm=snapshot.gpu_sm_utilization,
command=snapshot.command))
else:
print(colored(' - No Running Processes', attrs=('bold',)))
if separator:
print('-' * 120)
separator = True
.. figure:: https://user-images.githubusercontent.com/16078332/177041142-fe988d58-6a97-4559-84fd-b51204cf9231.png
:align: center
An example monitoring script built with APIs from ``nvitop``.
Please refer to section `More than a Monitor <https://github.com/XuehaiPan/nvitop#more-than-a-monitor>`_ in README for more examples.
------

View file

@ -702,7 +702,9 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
self, *,
host_process_snapshot_cache: Optional[Dict[int, Snapshot]] = None
) -> Snapshot:
"""Returns a onetime snapshot of the process on the GPU device."""
"""Returns a onetime snapshot of the process on the GPU device.
See also :meth:`take_snapshots` and :meth:`failsafe`.
"""
host_process_snapshot_cache = host_process_snapshot_cache or {}
try: