diff --git a/README.md b/README.md index 52aba23..7dc4e90 100644 --- a/README.md +++ b/README.md @@ -416,6 +416,8 @@ Please refer to [Resource Metric Collector](#resource-metric-collector) for an e #### Quick Start +A minimal script to monitor the GPU devices based on APIs from `nvitop`: + ```python from nvitop import Device @@ -434,9 +436,63 @@ for device in devices: print(f' - Processes ({len(processes)}): {sorted_pids}') for pid in sorted_pids: print(f' - {processes[pid]}') - print('-' * 80) + print('-' * 120) ``` +Another more advanced approach with coloring: + +```python +import time + +from nvitop import Device, GpuProcess, NA, colored + +print(colored(time.strftime('%a %b %d %H:%M:%S %Y'), color='red', attrs=('bold',))) + +devices = Device.cuda.all() # or `Device.all()` to use NVML ordinal instead +separator = False +for device in devices: + processes = device.processes() # type: Dict[int, GpuProcess] + + print(colored(str(device), color='green', attrs=('bold',))) + print(colored(' - Fan speed: ', color='blue', attrs=('bold',)) + f'{device.fan_speed()}%') + print(colored(' - Temperature: ', color='blue', attrs=('bold',)) + f'{device.temperature()}C') + print(colored(' - GPU utilization: ', color='blue', attrs=('bold',)) + f'{device.gpu_utilization()}%') + print(colored(' - Total memory: ', color='blue', attrs=('bold',)) + f'{device.memory_total_human()}') + print(colored(' - Used memory: ', color='blue', attrs=('bold',)) + f'{device.memory_used_human()}') + print(colored(' - Free memory: ', color='blue', attrs=('bold',)) + f'{device.memory_free_human()}') + if len(processes) > 0: + processes = GpuProcess.take_snapshots(processes.values(), failsafe=True) + processes.sort(key=lambda process: (process.username, process.pid)) + + print(colored(f' - Processes ({len(processes)}):', color='blue', attrs=('bold',))) + fmt = ' {pid:<5} {username:<8} {cpu:>5} {host_memory:>8} {time:>8} {gpu_memory:>8} {sm:>3} {command:<}'.format + print(colored(fmt(pid='PID', username='USERNAME', + cpu='CPU%', host_memory='HOST-MEM', time='TIME', + gpu_memory='GPU-MEM', sm='SM%', + command='COMMAND'), + attrs=('bold',))) + for snapshot in processes: + print(fmt(pid=snapshot.pid, + username=snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]), + cpu=snapshot.cpu_percent, host_memory=snapshot.host_memory_human, + time=snapshot.running_time_human, + gpu_memory=(snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'), + sm=snapshot.gpu_sm_utilization, + command=snapshot.command)) + else: + print(colored(' - No Running Processes', attrs=('bold',))) + + if separator: + print('-' * 120) + separator = True +``` + +

+ Demo +
+ An example monitoring script built with APIs from nvitop. +

+ #### Status Snapshot `nvitop` provides a helper function to retrieve the status of both GPU devices and GPU processes at once. You can type `help(nvitop.take_snapshots)` in Python REPL for detailed documentation. diff --git a/docs/source/index.rst b/docs/source/index.rst index c588b41..aad9789 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,9 +5,11 @@ Welcome to nvitop's documentation! An interactive NVIDIA-GPU process viewer, the one-stop solution for GPU process management. -.. image:: https://user-images.githubusercontent.com/16078332/171005261-1aad126e-dc27-4ed3-a89b-7f9c1c998bf7.png +.. figure:: https://user-images.githubusercontent.com/16078332/171005261-1aad126e-dc27-4ed3-a89b-7f9c1c998bf7.png :align: center + The CLI from ``nvitop``. + .. |GitHub| image:: https://img.shields.io/badge/GitHub-Homepage-blue?logo=github .. _GitHub: https://github.com/XuehaiPan/nvitop @@ -77,6 +79,8 @@ If this repo is useful to you, please star ⭐️ it to let more people know Quick Start """"""""""" +A minimal script to monitor the GPU devices based on APIs from ``nvitop``: + .. code-block:: python from nvitop import Device @@ -96,7 +100,62 @@ Quick Start print(f' - Processes ({len(processes)}): {sorted_pids}') for pid in sorted_pids: print(f' - {processes[pid]}') - print('-' * 80) + print('-' * 120) + +Another more advanced approach with coloring: + +.. code-block:: python + + import time + + from nvitop import Device, GpuProcess, NA, colored + + print(colored(time.strftime('%a %b %d %H:%M:%S %Y'), color='red', attrs=('bold',))) + + devices = Device.cuda.all() # or `Device.all()` to use NVML ordinal instead + separator = False + for device in devices: + processes = device.processes() # type: Dict[int, GpuProcess] + + print(colored(str(device), color='green', attrs=('bold',))) + print(colored(' - Fan speed: ', color='blue', attrs=('bold',)) + f'{device.fan_speed()}%') + print(colored(' - Temperature: ', color='blue', attrs=('bold',)) + f'{device.temperature()}C') + print(colored(' - GPU utilization: ', color='blue', attrs=('bold',)) + f'{device.gpu_utilization()}%') + print(colored(' - Total memory: ', color='blue', attrs=('bold',)) + f'{device.memory_total_human()}') + print(colored(' - Used memory: ', color='blue', attrs=('bold',)) + f'{device.memory_used_human()}') + print(colored(' - Free memory: ', color='blue', attrs=('bold',)) + f'{device.memory_free_human()}') + if len(processes) > 0: + processes = GpuProcess.take_snapshots(processes.values(), failsafe=True) + processes.sort(key=lambda process: (process.username, process.pid)) + + print(colored(f' - Processes ({len(processes)}):', color='blue', attrs=('bold',))) + fmt = ' {pid:<5} {username:<8} {cpu:>5} {host_memory:>8} {time:>8} {gpu_memory:>8} {sm:>3} {command:<}'.format + print(colored(fmt(pid='PID', username='USERNAME', + cpu='CPU%', host_memory='HOST-MEM', time='TIME', + gpu_memory='GPU-MEM', sm='SM%', + command='COMMAND'), + attrs=('bold',))) + for snapshot in processes: + print(fmt(pid=snapshot.pid, + username=snapshot.username[:7] + ('+' if len(snapshot.username) > 8 else snapshot.username[7:8]), + cpu=snapshot.cpu_percent, host_memory=snapshot.host_memory_human, + time=snapshot.running_time_human, + gpu_memory=(snapshot.gpu_memory_human if snapshot.gpu_memory_human is not NA else 'WDDM:N/A'), + sm=snapshot.gpu_sm_utilization, + command=snapshot.command)) + else: + print(colored(' - No Running Processes', attrs=('bold',))) + + if separator: + print('-' * 120) + separator = True + +.. figure:: https://user-images.githubusercontent.com/16078332/177041142-fe988d58-6a97-4559-84fd-b51204cf9231.png + :align: center + + An example monitoring script built with APIs from ``nvitop``. + +Please refer to section `More than a Monitor `_ in README for more examples. ------ diff --git a/nvitop/core/process.py b/nvitop/core/process.py index 18dfead..ca0734b 100644 --- a/nvitop/core/process.py +++ b/nvitop/core/process.py @@ -702,7 +702,9 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi self, *, host_process_snapshot_cache: Optional[Dict[int, Snapshot]] = None ) -> Snapshot: - """Returns a onetime snapshot of the process on the GPU device.""" + """Returns a onetime snapshot of the process on the GPU device. + See also :meth:`take_snapshots` and :meth:`failsafe`. + """ host_process_snapshot_cache = host_process_snapshot_cache or {} try: