refactor(core/process): get GPU instance ID and compute instance ID from c_nvmlProcessInfo_t

Signed-off-by: Xuehai Pan <XuehaiPan@pku.edu.cn>
This commit is contained in:
Xuehai Pan 2022-07-03 19:04:30 +08:00
parent eaca7a640c
commit 05d1b3dbd0
2 changed files with 25 additions and 9 deletions

View file

@ -1505,9 +1505,13 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
for type, func in [('C', 'nvmlDeviceGetComputeRunningProcesses'), # pylint: disable=redefined-builtin
('G', 'nvmlDeviceGetGraphicsRunningProcesses')]:
for p in nvml.nvmlQuery(func, self.handle, default=()): # pylint: disable=invalid-name
proc = processes[p.pid] = self.GPU_PROCESS_CLASS(pid=p.pid, device=self)
proc.set_gpu_memory(p.usedGpuMemory if isinstance(p.usedGpuMemory, int)
else NA) # used GPU memory is `N/A` in Windows Display Driver Model (WDDM)
proc = processes[p.pid] = self.GPU_PROCESS_CLASS(
pid=p.pid, device=self,
gpu_memory=(p.usedGpuMemory if isinstance(p.usedGpuMemory, int)
else NA), # used GPU memory is `N/A` in Windows Display Driver Model (WDDM)
gpu_instance_id=getattr(p, 'gpuInstanceId', 0xFFFFFFFF),
compute_instance_id=getattr(p, 'computeInstanceId', 0xFFFFFFFF)
)
proc.type = proc.type + type
if len(processes) > 0:

View file

@ -352,9 +352,12 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
INSTANCE_LOCK = threading.RLock()
INSTANCES = {}
def __new__(cls, pid: int, device: 'Device',
gpu_memory: Optional[Union[int, NaType]] = None, # pylint: disable=unused-argument
type: Optional[Union[str, NaType]] = None) -> 'GpuProcess': # pylint: disable=unused-argument,redefined-builtin
# pylint: disable=unused-argument
def __new__(cls, pid: int, device: 'Device', # pylint: disable=too-many-arguments
gpu_memory: Optional[Union[int, NaType]] = None,
gpu_instance_id: Optional[Union[int, NaType]] = None,
compute_instance_id: Optional[Union[int, NaType]] = None,
type: Optional[Union[str, NaType]] = None) -> 'GpuProcess': # pylint: disable=redefined-builtin
"""Returns the cached instance of ``GpuProcess``."""
if pid is None:
@ -382,24 +385,32 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
return instance
def __init__(self, pid: int, device: 'Device', # pylint: disable=unused-argument
def __init__(self, pid: int, device: 'Device', # pylint: disable=too-many-arguments,unused-argument
gpu_memory: Optional[Union[int, NaType]] = None,
type: Optional[Union[str, NaType]] = None) -> None: # pylint: disable=redefined-builtin
gpu_instance_id: Optional[Union[int, NaType]] = None,
compute_instance_id: Optional[Union[int, NaType]] = None,
type: Optional[Union[str, NaType]] = None) -> None: # pylint: disable=redefined-builtin
"""Initializes the instance returned by ``__new__()``."""
if gpu_memory is None and not hasattr(self, '_gpu_memory'):
gpu_memory = NA
if gpu_memory is not None:
self.set_gpu_memory(gpu_memory)
if type is None and not hasattr(self, '_type'):
type = NA
if type is not None:
self.type = type
if device.is_mig_device():
if gpu_instance_id is not None and compute_instance_id is not None:
self._gpu_instance_id = (gpu_instance_id if gpu_instance_id != 0xFFFFFFFF else NA)
self._compute_instance_id = (compute_instance_id if compute_instance_id != 0xFFFFFFFF else NA)
elif device.is_mig_device():
self._gpu_instance_id = device.gpu_instance_id()
self._compute_instance_id = device.compute_instance_id()
else:
self._gpu_instance_id = self._compute_instance_id = NA
for util in ('sm', 'memory', 'encoder', 'decoder'):
if not hasattr(self, '_gpu_{}_utilization'.format(util)):
setattr(self, '_gpu_{}_utilization'.format(util), NA)
@ -548,6 +559,7 @@ class GpuProcess: # pylint: disable=too-many-instance-attributes,too-many-publi
- 'C': compute context
- 'G': graphics context
- 'C+G': both compute context and graphics context
- 'N/A': not applicable
"""
return self._type