feat(api/device): add methods to query PCI-e throughput

This commit is contained in:
Xuehai Pan 2023-08-04 21:00:33 +08:00
parent ef77b8b989
commit 2d479deecc
6 changed files with 112 additions and 2 deletions

View file

@ -190,7 +190,9 @@ good-names=i,
fg,
bg,
n,
ui
ui,
tx,
rx
# Good variable names regexes, separated by a comma. If names match any regex,
# they will always be accepted

View file

@ -1095,6 +1095,11 @@ Out[16]: PhysicalDeviceSnapshot(
memory_utilization=7, # in percentage (NOTE: this is the utilization rate of GPU memory bandwidth)
mig_mode='N/A',
name='GeForce RTX 2080 Ti',
pcie_rx_throughput=1000, # in KiB/s
pcie_rx_throughput_human='1000KiB/s',
pcie_throughput=ThroughputInfo(tx=1000, rx=1000), # in KiB/s
pcie_tx_throughput=1000, # in KiB/s
pcie_tx_throughput_human='1000KiB/s',
performance_state='P2',
persistence_mode='Disabled',
power_limit=250000, # in milliwatts (mW)

View file

@ -146,3 +146,6 @@ MPS
KMD
conf
Unallocated
KiB
tx
rx

View file

@ -182,6 +182,21 @@ class UtilizationRates(NamedTuple): # in percentage # pylint: disable=missing-c
decoder: int | NaType
class ThroughputInfo(NamedTuple): # in KiB/s # pylint: disable=missing-class-docstring
tx: int | NaType
rx: int | NaType
@property
def transmit(self) -> int | NaType:
"""Alias of :attr:`tx`."""
return self.tx
@property
def receive(self) -> int | NaType:
"""Alias of :attr:`rx`."""
return self.rx
_VALUE_OMITTED: str = object() # type: ignore[assignment]
@ -1416,6 +1431,80 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
power_limit = f'{round(power_limit / 1000.0)}W'
return f'{power_usage} / {power_limit}'
def pcie_throughput(self) -> ThroughputInfo: # in KiB/s
"""The current PCIe throughput in KiB/s.
This function is querying a byte counter over a 20ms interval and thus is the PCIe
throughput over that interval.
Returns: ThroughputInfo(tx, rx)
A named tuple with current PCIe throughput in KiB/s, the item could be
:const:`nvitop.NA` when not applicable.
"""
return ThroughputInfo(tx=self.pcie_tx_throughput(), rx=self.pcie_rx_throughput())
@memoize_when_activated
def pcie_tx_throughput(self) -> int | NaType: # in KiB/s
"""The current PCIe transmit throughput in KiB/s.
This function is querying a byte counter over a 20ms interval and thus is the PCIe
throughput over that interval.
Returns: Union[int, NaType]
The current PCIe transmit throughput in KiB/s, or :const:`nvitop.NA` when not applicable.
"""
return libnvml.nvmlQuery(
'nvmlDeviceGetPcieThroughput',
self.handle,
libnvml.NVML_PCIE_UTIL_RX_BYTES,
)
@memoize_when_activated
def pcie_rx_throughput(self) -> int | NaType: # in KiB/s
"""The current PCIe receive throughput in KiB/s.
This function is querying a byte counter over a 20ms interval and thus is the PCIe
throughput over that interval.
Returns: Union[int, NaType]
The current PCIe receive throughput in KiB/s, or :const:`nvitop.NA` when not applicable.
"""
return libnvml.nvmlQuery(
'nvmlDeviceGetPcieThroughput',
self.handle,
libnvml.NVML_PCIE_UTIL_RX_BYTES,
)
def pcie_tx_throughput_human(self) -> str | NaType: # in human readable
"""The current PCIe transmit throughput in human readable format.
This function is querying a byte counter over a 20ms interval and thus is the PCIe
throughput over that interval.
Returns: Union[str, NaType]
The current PCIe transmit throughput in human readable format, or :const:`nvitop.NA`
when not applicable.
"""
tx_throughput = self.pcie_tx_throughput()
if libnvml.nvmlCheckReturn(tx_throughput, int):
return f'{bytes2human(tx_throughput << 10)}/s'
return NA
def pcie_rx_throughput_human(self) -> str | NaType: # in human readable
"""The current PCIe receive throughput in human readable format.
This function is querying a byte counter over a 20ms interval and thus is the PCIe
throughput over that interval.
Returns: Union[str, NaType]
The current PCIe receive throughput in human readable format, or :const:`nvitop.NA` when
not applicable.
"""
rx_throughput = self.pcie_rx_throughput()
if libnvml.nvmlCheckReturn(rx_throughput, int):
return f'{bytes2human(rx_throughput << 10)}/s'
return NA
def display_active(self) -> str | NaType:
"""A flag that indicates whether a display is initialized on the GPU's (e.g. memory is allocated on the device for display).
@ -1758,6 +1847,11 @@ class Device: # pylint: disable=too-many-instance-attributes,too-many-public-me
'power_usage',
'power_limit',
'power_status',
'pcie_throughput',
'pcie_tx_throughput',
'pcie_rx_throughput',
'pcie_tx_throughput_human',
'pcie_rx_throughput_human',
'display_active',
'display_mode',
'current_driver_model',

View file

@ -196,6 +196,8 @@ NVML_COMPUTEMODE_DEFAULT: int = _pynvml.NVML_COMPUTEMODE_DEFAULT
NVML_COMPUTEMODE_EXCLUSIVE_THREAD: int = _pynvml.NVML_COMPUTEMODE_EXCLUSIVE_THREAD
NVML_COMPUTEMODE_PROHIBITED: int = _pynvml.NVML_COMPUTEMODE_PROHIBITED
NVML_COMPUTEMODE_EXCLUSIVE_PROCESS: int = _pynvml.NVML_COMPUTEMODE_EXCLUSIVE_PROCESS
NVML_PCIE_UTIL_TX_BYTES: int = _pynvml.NVML_PCIE_UTIL_TX_BYTES
NVML_PCIE_UTIL_RX_BYTES: int = _pynvml.NVML_PCIE_UTIL_RX_BYTES
# pylint: enable=no-member
# New members in `libnvml` #########################################################################

View file

@ -525,7 +525,7 @@ SIZE_PATTERN: re.Pattern = re.compile(
"""The regex pattern for human readable size."""
# pylint: disable-next=too-many-return-statements
# pylint: disable-next=too-many-return-statements,too-many-branches
def bytes2human(
b: int | float | NaType,
*,
@ -545,6 +545,10 @@ def bytes2human(
return f'{b}B'
if b < MiB and min_unit <= KiB:
return f'{round(b / KiB)}KiB'
if b <= 100 * MiB and min_unit <= MiB:
return f'{round(b / MiB, 2):.2f}MiB'
if b <= 1000 * MiB and min_unit <= MiB:
return f'{round(b / MiB, 1):.1f}MiB'
if b <= 20 * GiB and min_unit <= MiB:
return f'{round(b / MiB)}MiB'
if b < 100 * GiB and min_unit <= GiB: