chore(gui/device): update device panel for MIG devices

Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
Xuehai Pan 2022-07-13 23:21:24 +08:00
parent d8e61cafb7
commit 6b576dad0d
5 changed files with 30 additions and 19 deletions

View file

@ -49,10 +49,12 @@ class Device(DeviceBase):
self._snapshot = None
self.tuple_index = ((self.index,) if isinstance(self.index, int) else self.index)
self.display_index = ':'.join(map(str, self.tuple_index))
def as_snapshot(self):
self._snapshot = super().as_snapshot()
self._snapshot.tuple_index = self.tuple_index
self._snapshot.display_index = self.display_index
return self._snapshot
@property
@ -143,11 +145,7 @@ class MigDevice(MigDeviceBase, Device):
self._snapshot = None
self.tuple_index = ((self.index,) if isinstance(self.index, int) else self.index)
def as_snapshot(self):
self._snapshot = super().as_snapshot()
self._snapshot.tuple_index = self.tuple_index
return self._snapshot
self.display_index = ':'.join(map(str, self.tuple_index))
loading_intensity = Device.memory_loading_intensity
@ -157,6 +155,7 @@ class MigDevice(MigDeviceBase, Device):
'memory_used', 'memory_free', 'memory_total',
'memory_used_human', 'memory_free_human', 'memory_total_human',
'memory_percent', 'memory_usage',
'bar1_memory_used_human', 'bar1_memory_percent',
'gpu_utilization', 'memory_utilization',

View file

@ -9,7 +9,7 @@ import time
from cachetools.func import ttl_cache
from nvitop.gui.library import (host, Device,
from nvitop.gui.library import (host, Device, NA,
Displayable, colored, cut_string, make_bar)
@ -75,8 +75,8 @@ class DevicePanel(Displayable): # pylint: disable=too-many-instance-attributes
]
self.mig_formats = [
'{physical_index:>3} GI ID: {gpu_instance_id:>3} CI ID: {compute_instance_id:>3} '
'{memory_usage:>20}Profile: {name:>11}',
'MIG: {mig_index:<3}{name:>8} @ GI/CI:{gpu_instance_id:>2}/{compute_instance_id:<2}'
'{memory_usage:>20}BAR1: {bar1_memory_used_human:>8} / {bar1_memory_percent_string:>3}',
]
if host.WINDOWS:
@ -128,9 +128,13 @@ class DevicePanel(Displayable): # pylint: disable=too-many-instance-attributes
if device.name.startswith('NVIDIA '):
device.name = device.name.replace('NVIDIA ', '', 1)
if device.is_mig_device:
device.name = ' '.join(device.name.split()[-2:])
if len(device.name) > 11:
device.name = device.name.split()[-1]
device.name = device.name.rpartition(' ')[-1]
if device.bar1_memory_percent is not NA:
device.bar1_memory_percent = round(device.bar1_memory_percent)
if device.bar1_memory_percent >= 100:
device.bar1_memory_percent_string = 'MAX'
else:
device.bar1_memory_percent_string = '{}%'.format(round(device.bar1_memory_percent))
else:
device.name = cut_string(device.name, maxlen=18, padstr='..', align='right')
device.current_driver_model = device.current_driver_model.replace('WDM', 'TCC')

View file

@ -98,12 +98,12 @@ class HostPanel(Displayable): # pylint: disable=too-many-instance-attributes
device.memory_percent = BufferedHistoryGraph(
interval=1.0, width=20, height=5, upsidedown=False,
baseline=0.0, upperbound=100.0, dynamic_bound=False,
format=lambda x: ('GPU {} MEM: {}').format('/'.join(map(str, device.tuple_index)), percentage(x))
format=lambda x: ('GPU {} MEM: {}').format(device.display_index, percentage(x))
)(device.memory_percent)
device.gpu_utilization = BufferedHistoryGraph(
interval=1.0, width=20, height=5, upsidedown=True,
baseline=0.0, upperbound=100.0, dynamic_bound=False,
format=lambda x: ('GPU {} UTL: {}').format('/'.join(map(str, device.tuple_index)), percentage(x))
format=lambda x: ('GPU {} UTL: {}').format(device.display_index, percentage(x))
)(device.gpu_utilization)
for device in self.devices:

View file

@ -332,17 +332,21 @@ class ProcessPanel(Displayable): # pylint: disable=too-many-instance-attributes
if len(self.snapshots) > 0:
y = self.y + 5
prev_device_index = None
prev_device_display_index = None
color = -1
for process in self.snapshots:
device_index = process.device.physical_index
device_display_index = process.device.display_index
if prev_device_index != device_index:
color = process.device.snapshot.display_color
if not self.compact and prev_device_index is not None:
self.addstr(y, self.x, '' + '' * (self.width - 2) + '')
if y == self.y_mouse:
self.y_mouse += 1
y += 1
prev_device_index = device_index
if prev_device_display_index != device_display_index:
color = process.device.snapshot.display_color
prev_device_display_index = device_display_index
host_info = process.host_info
if self.host_offset < 0:
@ -350,8 +354,8 @@ class ProcessPanel(Displayable): # pylint: disable=too-many-instance-attributes
else:
host_info = WideString(host_info)[self.host_offset:]
self.addstr(y, self.x,
' {:>3} {:>7} {} {:>7} {:>8} {:>3} {}'.format(
device_index, cut_string(process.pid, maxlen=7, padstr='.'),
'{:>4} {:>7} {} {:>7} {:>8} {:>3} {}'.format(
device_display_index, cut_string(process.pid, maxlen=7, padstr='.'),
process.type, cut_string(process.username, maxlen=7, padstr='+'),
process.gpu_memory_human, process.gpu_sm_utilization_string.replace('%', ''),
WideString(host_info).ljust(self.width - 39)[:self.width - 39]
@ -425,14 +429,18 @@ class ProcessPanel(Displayable): # pylint: disable=too-many-instance-attributes
key, reverse, *_ = self.ORDERS['natural']
self.snapshots.sort(key=key, reverse=reverse)
prev_device_index = None
prev_device_display_index = None
color = None
for process in self.snapshots:
device_index = process.device.physical_index
device_display_index = process.device.display_index
if prev_device_index != device_index:
color = process.device.snapshot.display_color
if prev_device_index is not None:
lines.append('' + '' * (self.width - 2) + '')
prev_device_index = device_index
if prev_device_display_index != device_display_index:
color = process.device.snapshot.display_color
prev_device_display_index = device_display_index
host_info = cut_string(process.host_info, padstr='..', maxlen=self.width - 39)
@ -452,7 +460,7 @@ class ProcessPanel(Displayable): # pylint: disable=too-many-instance-attributes
info = colored(process.command, color=('red' if is_gone else 'yellow')).join(info)
elif process.username != USERNAME and not SUPERUSER:
info = colored(info, attrs=('dark',))
lines.append(' {} {}'.format(colored('{:>3}'.format(device_index), color), info))
lines.append('{} {}'.format(colored('{:>4}'.format(device_display_index), color), info))
lines.append('' + '' * (self.width - 2) + '')

View file

@ -286,7 +286,7 @@ class TreeViewScreen(Displayable): # pylint: disable=too-many-instance-attribut
snapshot.username = WideString(snapshot.username)
snapshot.prefix = node.prefix
if len(node.devices) > 0:
snapshot.devices = 'GPU ' + ','.join(map(lambda device: '/'.join(map(str, device.tuple_index)),
snapshot.devices = 'GPU ' + ','.join(map(lambda device: device.display_index,
sorted(node.devices, key=lambda device: device.tuple_index)))
else:
snapshot.devices = 'Host'