Add mx-smi MetaX GPU backend

2026-05-15 14:15:55 -06:00 · 2026-04-29 20:13:04 +08:00 · 2026-04-29 20:13:04 +08:00 · a306d69a36
commit a306d69a36
parent a6761eb5c4
5 changed files with 707 additions and 23 deletions
--- a/nvitop/init.py
+++ b/nvitop/init.py
@ -27,6 +27,7 @@ from nvitop.api import (
    host,
    libcuda,
    libcudart,
+    libmxsmi,
    libnvml,
    process,
    termcolor,
@ -46,6 +47,7 @@ for submodule in (
    host,
    libcuda,
    libcudart,
+    libmxsmi,
    libnvml,
    process,
    termcolor,
--- a/nvitop/api/init.py
+++ b/nvitop/api/init.py
@ -23,6 +23,7 @@ from nvitop.api import (
    host,
    libcuda,
    libcudart,
+    libmxsmi,
    libnvml,
    process,
    termcolor,
@ -69,6 +70,7 @@ __all__ = [  # noqa: RUF022
    'NVMLError',
    'nvmlCheckReturn',
    'libnvml',
+    'libmxsmi',
    'libcuda',
    'libcudart',
    # nvitop.api.device
--- a/nvitop/api/device.py
+++ b/nvitop/api/device.py
@ -117,7 +117,7 @@ import time
 from collections import OrderedDict
 from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, overload

-from nvitop.api import host, libcuda, libcudart, libnvml
+from nvitop.api import host, libcuda, libcudart, libmxsmi, libnvml
 from nvitop.api.process import GpuProcess
 from nvitop.api.utils import (
    NA,
@ -240,6 +240,38 @@ _VALUE_OMITTED: str = ValueOmitted()  # type: ignore[assignment]
 del ValueOmitted


+_ACTIVE_BACKEND: str | None = None
+_ACTIVE_BACKEND_LOCK: threading.RLock = threading.RLock()
+
+
+def _set_active_backend(backend: str) -> None:
+    global _ACTIVE_BACKEND  # pylint: disable=global-statement
+
+    with _ACTIVE_BACKEND_LOCK:
+        _ACTIVE_BACKEND = backend
+
+
+def _get_active_backend() -> str | None:
+    with _ACTIVE_BACKEND_LOCK:
+        return _ACTIVE_BACKEND
+
+
+def _should_use_mxsmi_backend() -> bool:
+    return libmxsmi.is_forced() or _get_active_backend() == 'mx-smi'
+
+
+@contextlib.contextmanager
+def _nvml_probe() -> Generator[None]:
+    suppress_logs = libmxsmi.is_available()
+    logger_disabled = libnvml.LOGGER.disabled
+    if suppress_logs:
+        libnvml.LOGGER.disabled = True
+    try:
+        yield
+    finally:
+        libnvml.LOGGER.disabled = logger_disabled
+
+
 class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-methods
    """Live class of the GPU devices, different from the device snapshots.

@ -333,9 +365,33 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """Test whether there are any devices and the NVML library is successfully loaded."""
        try:
            return cls.count() > 0
-        except libnvml.NVMLError:
+        except (libnvml.NVMLError, libmxsmi.MxSmiError):
            return False

+    @staticmethod
+    def backend() -> str:
+        """Return the active GPU query backend."""
+        active_backend = _get_active_backend()
+        if libmxsmi.is_forced():
+            return 'mx-smi'
+        if active_backend is not None:
+            return active_backend
+        try:
+            with _nvml_probe():
+                device_count = libnvml.nvmlQuery('nvmlDeviceGetCount', default=0)
+            if device_count > 0:
+                _set_active_backend('nvml')
+                return 'nvml'
+        except libnvml.NVMLError:
+            if libmxsmi.is_available():
+                _set_active_backend('mx-smi')
+                return 'mx-smi'
+            raise
+        if libmxsmi.is_available():
+            _set_active_backend('mx-smi')
+            return 'mx-smi'
+        return 'nvml'
+
    @staticmethod
    def driver_version() -> str | NaType:
        """The version of the installed NVIDIA display driver. This is an alphanumeric string.
@ -355,7 +411,18 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                If RM detects a driver/library version mismatch, usually after an upgrade for NVIDIA
                driver without reloading the kernel module.
        """
-        return libnvml.nvmlQuery('nvmlSystemGetDriverVersion')
+        if _should_use_mxsmi_backend():
+            return libmxsmi.driver_version()
+        try:
+            with _nvml_probe():
+                driver_version = libnvml.nvmlQuery('nvmlSystemGetDriverVersion')
+        except libnvml.NVMLError:
+            if libmxsmi.is_available():
+                _set_active_backend('mx-smi')
+                return libmxsmi.driver_version()
+            raise
+        _set_active_backend('nvml')
+        return driver_version

    @staticmethod
    def cuda_driver_version() -> str | NaType:
@ -375,7 +442,17 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                If RM detects a driver/library version mismatch, usually after an upgrade for NVIDIA
                driver without reloading the kernel module.
        """
-        cuda_driver_version = libnvml.nvmlQuery('nvmlSystemGetCudaDriverVersion')
+        if _should_use_mxsmi_backend():
+            return libmxsmi.maca_version()
+        try:
+            with _nvml_probe():
+                cuda_driver_version = libnvml.nvmlQuery('nvmlSystemGetCudaDriverVersion')
+        except libnvml.NVMLError:
+            if libmxsmi.is_available():
+                _set_active_backend('mx-smi')
+                return libmxsmi.maca_version()
+            raise
+        _set_active_backend('nvml')
        if libnvml.nvmlCheckReturn(cuda_driver_version, int):
            major = cuda_driver_version // 1000
            minor = (cuda_driver_version % 1000) // 10
@ -423,7 +500,22 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                If RM detects a driver/library version mismatch, usually after an upgrade for NVIDIA
                driver without reloading the kernel module.
        """
-        return libnvml.nvmlQuery('nvmlDeviceGetCount', default=0)
+        if _should_use_mxsmi_backend():
+            return libmxsmi.device_count()
+        try:
+            with _nvml_probe():
+                count = libnvml.nvmlQuery('nvmlDeviceGetCount', default=0)
+        except libnvml.NVMLError:
+            if libmxsmi.is_available():
+                _set_active_backend('mx-smi')
+                return libmxsmi.device_count()
+            raise
+        if count == 0 and libmxsmi.is_available():
+            _set_active_backend('mx-smi')
+            return libmxsmi.device_count()
+        if count > 0:
+            _set_active_backend('nvml')
+        return count

    @classmethod
    def all(cls) -> list[PhysicalDevice]:
@ -700,36 +792,50 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        self._is_mig_device: bool | None = None
        self._cuda_index: int | None = None
        self._cuda_compute_capability: tuple[int, int] | NaType | None = None
+        self._backend: str = 'nvml'

        self._handle: libnvml.c_nvmlDevice_t | None
-        if index is not None:
+        if _should_use_mxsmi_backend():
+            self._init_mxsmi(index=index, uuid=uuid, bus_id=bus_id)
+        elif index is not None:
            self._nvml_index = index  # type: ignore[assignment]
            try:
-                self._handle = libnvml.nvmlQuery(
-                    'nvmlDeviceGetHandleByIndex',
-                    index,
-                    ignore_errors=False,
-                )
+                with _nvml_probe():
+                    self._handle = libnvml.nvmlQuery(
+                        'nvmlDeviceGetHandleByIndex',
+                        index,
+                        ignore_errors=False,
+                    )
            except libnvml.NVMLError_GpuIsLost:
                self._handle = None
                self._name = 'ERROR: GPU is Lost'
            except libnvml.NVMLError_Unknown:
                self._handle = None
                self._name = 'ERROR: Unknown'
+            except libnvml.NVMLError:
+                if libmxsmi.is_available():
+                    _set_active_backend('mx-smi')
+                    self._init_mxsmi(index=index)
+                else:
+                    raise
+            else:
+                _set_active_backend('nvml')
        else:
            try:
                if uuid is not None:
-                    self._handle = libnvml.nvmlQuery(
-                        'nvmlDeviceGetHandleByUUID',
-                        uuid,
-                        ignore_errors=False,
-                    )
+                    with _nvml_probe():
+                        self._handle = libnvml.nvmlQuery(
+                            'nvmlDeviceGetHandleByUUID',
+                            uuid,
+                            ignore_errors=False,
+                        )
                else:
-                    self._handle = libnvml.nvmlQuery(
-                        'nvmlDeviceGetHandleByPciBusId',
-                        bus_id,
-                        ignore_errors=False,
-                    )
+                    with _nvml_probe():
+                        self._handle = libnvml.nvmlQuery(
+                            'nvmlDeviceGetHandleByPciBusId',
+                            bus_id,
+                            ignore_errors=False,
+                        )
            except libnvml.NVMLError_GpuIsLost:
                self._handle = None
                self._nvml_index = NA  # type: ignore[assignment]
@ -738,7 +844,14 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                self._handle = None
                self._nvml_index = NA  # type: ignore[assignment]
                self._name = 'ERROR: Unknown'
+            except libnvml.NVMLError:
+                if libmxsmi.is_available():
+                    _set_active_backend('mx-smi')
+                    self._init_mxsmi(uuid=uuid, bus_id=bus_id)
+                else:
+                    raise
            else:
+                _set_active_backend('nvml')
                self._nvml_index = libnvml.nvmlQuery('nvmlDeviceGetIndex', self._handle)

        self._max_clock_infos: ClockInfos = ClockInfos(graphics=NA, sm=NA, memory=NA, video=NA)
@ -747,6 +860,36 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        self._ident: tuple[Hashable, str] = (self.index, self.uuid())
        self._hash: int | None = None

+    def _init_mxsmi(
+        self,
+        *,
+        index: int | tuple[int, int] | bytes | None = None,
+        uuid: bytes | None = None,
+        bus_id: bytes | None = None,
+    ) -> None:
+        """Initialize this device from the MetaX ``mx-smi`` backend."""
+        if isinstance(index, tuple):
+            raise libnvml.NVMLError_NotSupported
+        try:
+            info = libmxsmi.get_device(index=index, uuid=uuid, bus_id=bus_id)
+        except libmxsmi.MxSmiDeviceNotFound as ex:
+            raise libnvml.NVMLError_NotFound from ex
+
+        _set_active_backend('mx-smi')
+        self._backend = 'mx-smi'
+        self._handle = None
+        self._nvml_index = info.index
+        self._name = info.name
+        self._uuid = info.uuid
+        self._bus_id = info.bus_id
+        self._memory_total = info.memory_total
+
+    def _is_mxsmi_device(self) -> bool:
+        return self._backend == 'mx-smi'
+
+    def _mxsmi_info(self) -> libmxsmi.DeviceInfo:
+        return libmxsmi.get_device(index=self.physical_index)
+
    def __repr__(self) -> str:
        """Return a string representation of the device."""
        return '{}(index={}, name={!r}, total_memory={})'.format(  # noqa: UP032
@ -904,6 +1047,9 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
        """
+        if self._is_mxsmi_device():
+            self._name = self._mxsmi_info().name
+            return self._name
        if self._handle is not None and self._name is NA:
            self._name = libnvml.nvmlQuery('nvmlDeviceGetName', self._handle)
        return self._name
@ -922,6 +1068,9 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=name
        """
+        if self._is_mxsmi_device():
+            self._uuid = self._mxsmi_info().uuid
+            return self._uuid
        if self._handle is not None and self._uuid is NA:
            self._uuid = libnvml.nvmlQuery('nvmlDeviceGetUUID', self._handle)
        return self._uuid
@ -938,6 +1087,9 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pci.bus_id
        """
+        if self._is_mxsmi_device():
+            self._bus_id = self._mxsmi_info().bus_id
+            return self._bus_id
        if self._handle is not None and self._bus_id is NA:
            self._bus_id = libnvml.nvmlQuery(
                lambda handle: libnvml.nvmlDeviceGetPciInfo(handle).busId,
@ -959,6 +1111,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=serial
        """
+        if self._is_mxsmi_device():
+            return NA
        if self._handle is not None:
            return libnvml.nvmlQuery('nvmlDeviceGetSerial', self._handle)
        return NA
@ -970,6 +1124,14 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        Returns: MemoryInfo(total, free, used, reserved)
            A named tuple with memory information, the item could be :const:`nvitop.NA` when not applicable.
        """
+        if self._is_mxsmi_device():
+            info = self._mxsmi_info()
+            return MemoryInfo(
+                total=info.memory_total,
+                free=info.memory_free,
+                used=info.memory_used,
+                reserved=NA,
+            )
        if self._handle is not None:
            has_unified_memory = False
            try:
@ -1179,6 +1341,15 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        Returns: UtilizationRates(gpu, memory, encoder, decoder)
            A named tuple with GPU utilization rates (in percentage) for the device, the item could be :const:`nvitop.NA` when not applicable.
        """  # pylint: disable=line-too-long
+        if self._is_mxsmi_device():
+            info = self._mxsmi_info()
+            return UtilizationRates(
+                gpu=info.gpu_utilization,
+                memory=info.memory_utilization,
+                encoder=NA,
+                decoder=NA,
+            )
+
        gpu, memory, encoder, decoder = NA, NA, NA, NA

        if self._handle is not None:
@ -1449,6 +1620,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=fan.speed
        """  # pylint: disable=line-too-long
+        if self._is_mxsmi_device():
+            return self._mxsmi_info().fan_speed
        if self._handle is not None:
            return libnvml.nvmlQuery('nvmlDeviceGetFanSpeed', self._handle)
        return NA
@ -1465,6 +1638,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=temperature.gpu
        """
+        if self._is_mxsmi_device():
+            return self._mxsmi_info().temperature
        if self._handle is not None:
            return libnvml.nvmlQuery(
                'nvmlDeviceGetTemperature',
@ -1486,6 +1661,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            $(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.draw)" * 1000 ))
        """
+        if self._is_mxsmi_device():
+            return self._mxsmi_info().power_usage
        if self._handle is not None:
            return libnvml.nvmlQuery('nvmlDeviceGetPowerUsage', self._handle)
        return NA
@ -1507,6 +1684,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            $(( "$(nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=power.limit)" * 1000 ))
        """
+        if self._is_mxsmi_device():
+            return self._mxsmi_info().power_limit
        if self._handle is not None:
            return libnvml.nvmlQuery('nvmlDeviceGetPowerManagementLimit', self._handle)
        return NA
@ -1547,6 +1726,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        Returns: Union[int, NaType]
            The current PCIe transmit throughput in KiB/s, or :const:`nvitop.NA` when not applicable.
        """
+        if self._is_mxsmi_device():
+            return NA
        if self._handle is not None:
            return libnvml.nvmlQuery(
                'nvmlDeviceGetPcieThroughput',
@ -1565,6 +1746,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        Returns: Union[int, NaType]
            The current PCIe receive throughput in KiB/s, or :const:`nvitop.NA` when not applicable.
        """
+        if self._is_mxsmi_device():
+            return NA
        if self._handle is not None:
            return libnvml.nvmlQuery(
                'nvmlDeviceGetPcieThroughput',
@ -2131,6 +2314,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=persistence_mode
        """  # pylint: disable=line-too-long
+        if self._is_mxsmi_device():
+            return self._mxsmi_info().persistence_mode
        if self._handle is not None:
            return {
                0: 'Disabled',
@ -2150,6 +2335,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=pstate
        """  # pylint: disable=line-too-long
+        if self._is_mxsmi_device():
+            return self._mxsmi_info().performance_state
        if self._handle is not None:
            performance_state = libnvml.nvmlQuery('nvmlDeviceGetPerformanceState', self._handle)
            if libnvml.nvmlCheckReturn(performance_state, int):
@ -2194,6 +2381,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=compute_mode
        """  # pylint: disable=line-too-long
+        if self._is_mxsmi_device():
+            return 'Default'
        if self._handle is not None:
            return {
                libnvml.NVML_COMPUTEMODE_DEFAULT: 'Default',
@ -2215,6 +2404,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=compute_cap
        """
+        if self._is_mxsmi_device():
+            return NA
        if self._handle is not None:
            if self._cuda_compute_capability is None:
                self._cuda_compute_capability = libnvml.nvmlQuery(
@ -2226,6 +2417,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

    def is_mig_device(self) -> bool:
        """Return whether or not the device is a MIG device."""
+        if self._is_mxsmi_device():
+            return False
        if self._handle is not None:
            if self._is_mig_device is None:
                is_mig_device = libnvml.nvmlQuery(
@ -2253,6 +2446,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

            nvidia-smi --id=<IDENTIFIER> --format=csv,noheader,nounits --query-gpu=mig.mode.current
        """
+        if self._is_mxsmi_device():
+            return NA
        if self._handle is None:
            return NA
        if self.is_mig_device():
@ -2313,6 +2508,17 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        Returns: Dict[int, GpuProcess]
            A dictionary mapping PID to GPU process instance.
        """
+        if self._is_mxsmi_device():
+            processes = {}
+            for process in libmxsmi.processes(self.physical_index):
+                processes[process.pid] = self.GPU_PROCESS_CLASS(
+                    pid=process.pid,
+                    device=self,
+                    gpu_memory=process.used_memory,
+                    type='C',
+                )
+            return processes
+
        if self._handle is None:
            return {}

--- a/nvitop/api/libmxsmi.py
+++ b/nvitop/api/libmxsmi.py
@ -0,0 +1,467 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2025 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for querying MetaX GPUs through ``mx-smi``."""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import subprocess
+import threading
+import time
+from dataclasses import dataclass, replace
+
+from nvitop.api.utils import MiB, NA, NaType
+
+
+__all__ = [
+    'DeviceInfo',
+    'MxSmiError',
+    'MxSmiDeviceNotFound',
+    'MxSmiNotFound',
+    'MxSmiSnapshot',
+    'ProcessInfo',
+    'clear_cache',
+    'device_count',
+    'driver_version',
+    'get_device',
+    'is_available',
+    'is_forced',
+    'maca_version',
+    'processes',
+    'snapshot',
+]
+
+
+@dataclass(frozen=True)
+class DeviceInfo:
+    """MetaX GPU device information collected from ``mx-smi``."""
+
+    index: int
+    name: str | NaType = NA
+    uuid: str | NaType = NA
+    bus_id: str | NaType = NA
+    state: str | NaType = NA
+    persistence_mode: str | NaType = NA
+    performance_state: str | NaType = NA
+    memory_total: int | NaType = NA
+    memory_used: int | NaType = NA
+    memory_free: int | NaType = NA
+    gpu_utilization: int | NaType = NA
+    memory_utilization: int | NaType = NA
+    temperature: int | NaType = NA
+    power_usage: int | NaType = NA
+    power_limit: int | NaType = NA
+    fan_speed: int | NaType = NA
+
+
+@dataclass(frozen=True)
+class ProcessInfo:
+    """MetaX GPU process information collected from ``mx-smi``."""
+
+    gpu_index: int
+    pid: int
+    name: str | NaType = NA
+    used_memory: int | NaType = NA
+
+
+@dataclass(frozen=True)
+class MxSmiSnapshot:
+    """A single ``mx-smi`` sample."""
+
+    devices: dict[int, DeviceInfo]
+    processes: list[ProcessInfo]
+    driver_version: str | NaType = NA
+    maca_version: str | NaType = NA
+    mxsmi_version: str | NaType = NA
+
+
+class MxSmiError(RuntimeError):
+    """Base exception for ``mx-smi`` query errors."""
+
+
+class MxSmiNotFound(MxSmiError):
+    """Raised when the ``mx-smi`` executable is not available."""
+
+
+class MxSmiDeviceNotFound(MxSmiError):
+    """Raised when a MetaX GPU device cannot be found."""
+
+
+_BACKEND_ENVVAR = 'NVITOP_GPU_BACKEND'
+_CACHE_TTL = 0.25
+_CACHE_LOCK = threading.RLock()
+_CACHE: MxSmiSnapshot | None = None
+_CACHE_EXPIRES_AT = 0.0
+
+_LIST_RE = re.compile(
+    r'^GPU#(?P<index>\d+)\s+'
+    r'(?P<name>.+?)\s+'
+    r'(?P<bus_id>[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])\s+'
+    r'(?P<state>.*?)\s+'
+    r'\(UUID:\s*(?P<uuid>[^)]+)\)\s*$',
+)
+_MXSMI_VERSION_RE = re.compile(r'\bmx-smi\s+version:\s*(?P<version>\S+)', flags=re.IGNORECASE)
+_DRIVER_VERSION_RE = re.compile(r'Kernel Mode Driver Version:\s*(?P<version>[^\s|]+)')
+_MACA_VERSION_RE = re.compile(r'MACA Version:\s*(?P<version>[^\s|]+)')
+_SUMMARY_FIRST_RE = re.compile(
+    r'^(?P<index>\d+)\s+(?P<name>.+?)\s+(?P<persistence>On|Off|Enable|Disable|Enabled|Disabled)\s*$',
+)
+_GPU_UTIL_RE = re.compile(r'(?P<util>\d+(?:\.\d+)?)\s*%')
+_SUMMARY_SECOND_RE = re.compile(
+    r'^(?P<temperature>\d+(?:\.\d+)?)C\s+'
+    r'(?P<power_usage>\d+(?:\.\d+)?)W\s*/\s*'
+    r'(?P<power_limit>\d+(?:\.\d+)?)W\s+'
+    r'(?P<performance_state>\S+)',
+)
+_MEMORY_RE = re.compile(
+    r'(?P<used>\d+(?:\.\d+)?)\s*/\s*(?P<total>\d+(?:\.\d+)?)\s*MiB',
+    flags=re.IGNORECASE,
+)
+_PROCESS_RE = re.compile(
+    r'^\|\s*(?P<gpu_index>\d+)\s+'
+    r'(?P<pid>\d+)\s+'
+    r'(?P<name>.*?)\s+'
+    r'(?P<used_memory>\d+(?:\.\d+)?|N/A)\s*\|\s*$',
+)
+
+
+def is_forced() -> bool:
+    """Return whether the MetaX backend was explicitly requested."""
+    backend = os.getenv(_BACKEND_ENVVAR, default='').strip().lower().replace('_', '-')
+    return backend in {'mx-smi', 'mxsmi', 'metax'}
+
+
+def is_available() -> bool:
+    """Return whether ``mx-smi`` can see at least one MetaX GPU."""
+    if shutil.which('mx-smi') is None:
+        return False
+    try:
+        return device_count() > 0
+    except MxSmiError:
+        return False
+
+
+def device_count() -> int:
+    """Return the number of MetaX GPUs visible to ``mx-smi``."""
+    return len(snapshot().devices)
+
+
+def driver_version() -> str | NaType:
+    """Return the MetaX kernel mode driver version."""
+    return snapshot().driver_version
+
+
+def maca_version() -> str | NaType:
+    """Return the MACA runtime version reported by ``mx-smi``."""
+    return snapshot().maca_version
+
+
+def get_device(
+    *,
+    index: int | bytes | None = None,
+    uuid: str | bytes | None = None,
+    bus_id: str | bytes | None = None,
+) -> DeviceInfo:
+    """Return a MetaX device by index, UUID, or PCI bus ID."""
+    if sum(arg is not None for arg in (index, uuid, bus_id)) != 1:
+        raise TypeError('get_device() expects exactly one identifier.')
+
+    devices = snapshot().devices
+    if index is not None:
+        try:
+            return devices[int(index)]
+        except (KeyError, TypeError, ValueError) as ex:
+            raise MxSmiDeviceNotFound(f'MetaX GPU index {index!r} was not found.') from ex
+
+    identifier = _normalize_identifier(uuid if uuid is not None else bus_id)
+    for device in devices.values():
+        if identifier in {_normalize_identifier(device.uuid), _normalize_identifier(device.bus_id)}:
+            return device
+
+    raise MxSmiDeviceNotFound(f'MetaX GPU {identifier!r} was not found.')
+
+
+def processes(index: int) -> list[ProcessInfo]:
+    """Return processes reported by ``mx-smi`` for the given GPU index."""
+    return [process for process in snapshot().processes if process.gpu_index == index]
+
+
+def snapshot(*, ttl: float = _CACHE_TTL) -> MxSmiSnapshot:
+    """Take or return a cached ``mx-smi`` snapshot."""
+    global _CACHE, _CACHE_EXPIRES_AT  # pylint: disable=global-statement
+
+    now = time.monotonic()
+    with _CACHE_LOCK:
+        if _CACHE is not None and now < _CACHE_EXPIRES_AT:
+            return _CACHE
+
+    current = _take_snapshot()
+
+    with _CACHE_LOCK:
+        _CACHE = current
+        _CACHE_EXPIRES_AT = time.monotonic() + ttl
+        return _CACHE
+
+
+def clear_cache() -> None:
+    """Clear the cached ``mx-smi`` snapshot."""
+    global _CACHE, _CACHE_EXPIRES_AT  # pylint: disable=global-statement
+
+    with _CACHE_LOCK:
+        _CACHE = None
+        _CACHE_EXPIRES_AT = 0.0
+
+
+def _take_snapshot() -> MxSmiSnapshot:
+    listed_devices, listed_mxsmi_version = _parse_list_output(_run_mxsmi('-L'))
+    summary = _parse_summary_output(_run_mxsmi())
+
+    devices = listed_devices.copy()
+    for index, device in summary.devices.items():
+        base = devices.get(index, DeviceInfo(index=index))
+        devices[index] = replace(
+            base,
+            name=device.name if device.name is not NA else base.name,
+            bus_id=device.bus_id if device.bus_id is not NA else base.bus_id,
+            state=device.state if device.state is not NA else base.state,
+            persistence_mode=(
+                device.persistence_mode
+                if device.persistence_mode is not NA
+                else base.persistence_mode
+            ),
+            performance_state=(
+                device.performance_state
+                if device.performance_state is not NA
+                else base.performance_state
+            ),
+            memory_total=device.memory_total,
+            memory_used=device.memory_used,
+            memory_free=device.memory_free,
+            gpu_utilization=device.gpu_utilization,
+            memory_utilization=device.memory_utilization,
+            temperature=device.temperature,
+            power_usage=device.power_usage,
+            power_limit=device.power_limit,
+            fan_speed=device.fan_speed,
+        )
+
+    return MxSmiSnapshot(
+        devices=devices,
+        processes=summary.processes,
+        driver_version=summary.driver_version,
+        maca_version=summary.maca_version,
+        mxsmi_version=summary.mxsmi_version if summary.mxsmi_version is not NA else listed_mxsmi_version,
+    )
+
+
+def _run_mxsmi(*args: str) -> str:
+    executable = shutil.which('mx-smi')
+    if executable is None:
+        raise MxSmiNotFound('The `mx-smi` executable was not found.')
+
+    command = [executable, *args]
+    try:
+        completed = subprocess.run(  # noqa: S603
+            command,
+            check=False,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            encoding='utf-8',
+            errors='replace',
+            timeout=10.0,
+        )
+    except (OSError, subprocess.SubprocessError) as ex:
+        raise MxSmiError(f'Failed to run `{_command_to_string(command)}`.') from ex
+
+    if completed.returncode != 0:
+        output = completed.stdout.strip()
+        message = f'`{_command_to_string(command)}` exited with status {completed.returncode}.'
+        if output:
+            message = f'{message}\n{output}'
+        raise MxSmiError(message)
+
+    return completed.stdout
+
+
+def _parse_list_output(output: str) -> tuple[dict[int, DeviceInfo], str | NaType]:
+    devices: dict[int, DeviceInfo] = {}
+    mxsmi_version: str | NaType = NA
+
+    for line in output.splitlines():
+        version_match = _MXSMI_VERSION_RE.search(line)
+        if version_match is not None:
+            mxsmi_version = version_match.group('version')
+            continue
+
+        match = _LIST_RE.match(line.strip())
+        if match is None:
+            continue
+
+        index = int(match.group('index'))
+        devices[index] = DeviceInfo(
+            index=index,
+            name=match.group('name').strip(),
+            uuid=match.group('uuid').strip(),
+            bus_id=match.group('bus_id').strip(),
+            state=match.group('state').strip() or NA,
+        )
+
+    return devices, mxsmi_version
+
+
+def _parse_summary_output(output: str) -> MxSmiSnapshot:
+    devices: dict[int, DeviceInfo] = {}
+    processes: list[ProcessInfo] = []
+    driver_version: str | NaType = NA
+    maca_version: str | NaType = NA
+    mxsmi_version: str | NaType = NA
+    lines = output.splitlines()
+
+    for lineno, line in enumerate(lines):
+        version_match = _MXSMI_VERSION_RE.search(line)
+        if version_match is not None:
+            mxsmi_version = version_match.group('version')
+
+        driver_match = _DRIVER_VERSION_RE.search(line)
+        if driver_match is not None:
+            driver_version = driver_match.group('version')
+
+        maca_match = _MACA_VERSION_RE.search(line)
+        if maca_match is not None:
+            maca_version = maca_match.group('version')
+
+        parts = _split_table_line(line)
+        if len(parts) != 3:
+            continue
+
+        first_match = _SUMMARY_FIRST_RE.match(parts[0])
+        if first_match is None:
+            continue
+
+        try:
+            next_parts = _split_table_line(lines[lineno + 1])
+        except IndexError:
+            continue
+        if len(next_parts) != 3:
+            continue
+
+        second_match = _SUMMARY_SECOND_RE.match(next_parts[0])
+        memory_match = _MEMORY_RE.search(next_parts[1])
+        if second_match is None or memory_match is None:
+            continue
+
+        index = int(first_match.group('index'))
+        memory_used = _mib_to_bytes(memory_match.group('used'))
+        memory_total = _mib_to_bytes(memory_match.group('total'))
+        memory_free = (
+            memory_total - memory_used
+            if isinstance(memory_total, int) and isinstance(memory_used, int)
+            else NA
+        )
+        devices[index] = DeviceInfo(
+            index=index,
+            name=first_match.group('name').strip(),
+            bus_id=parts[1].strip(),
+            state=next_parts[2].strip() or NA,
+            persistence_mode=_normalize_mode(first_match.group('persistence')),
+            performance_state=second_match.group('performance_state'),
+            memory_total=memory_total,
+            memory_used=memory_used,
+            memory_free=memory_free,
+            gpu_utilization=_percent_to_int(parts[2]),
+            temperature=round(float(second_match.group('temperature'))),
+            power_usage=_watts_to_milliwatts(second_match.group('power_usage')),
+            power_limit=_watts_to_milliwatts(second_match.group('power_limit')),
+        )
+
+    in_process_table = False
+    for line in lines:
+        if '| Process:' in line:
+            in_process_table = True
+            continue
+        if not in_process_table:
+            continue
+        if 'no process found' in line.lower():
+            continue
+
+        process_match = _PROCESS_RE.match(line)
+        if process_match is None:
+            continue
+
+        processes.append(
+            ProcessInfo(
+                gpu_index=int(process_match.group('gpu_index')),
+                pid=int(process_match.group('pid')),
+                name=process_match.group('name').strip() or NA,
+                used_memory=_mib_to_bytes(process_match.group('used_memory')),
+            ),
+        )
+
+    return MxSmiSnapshot(
+        devices=devices,
+        processes=processes,
+        driver_version=driver_version,
+        maca_version=maca_version,
+        mxsmi_version=mxsmi_version,
+    )
+
+
+def _split_table_line(line: str) -> list[str]:
+    if not line.startswith('|'):
+        return []
+    return [part.strip() for part in line.strip().strip('|').split('|')]
+
+
+def _mib_to_bytes(value: str) -> int | NaType:
+    if value.upper() == 'N/A':
+        return NA
+    return round(float(value) * MiB)
+
+
+def _watts_to_milliwatts(value: str) -> int:
+    return round(float(value) * 1000)
+
+
+def _percent_to_int(value: str) -> int | NaType:
+    match = _GPU_UTIL_RE.search(value)
+    if match is None:
+        return NA
+    return round(float(match.group('util')))
+
+
+def _normalize_mode(value: str) -> str | NaType:
+    normalized = value.strip().lower()
+    if normalized in {'on', 'enable', 'enabled'}:
+        return 'Enabled'
+    if normalized in {'off', 'disable', 'disabled'}:
+        return 'Disabled'
+    return NA
+
+
+def _normalize_identifier(value: str | bytes | NaType | None) -> str:
+    if isinstance(value, bytes):
+        value = value.decode('utf-8', errors='replace')
+    if value is None or value is NA:
+        return ''
+    return str(value).strip().lower()
+
+
+def _command_to_string(command: list[str]) -> str:
+    return ' '.join(command)
--- a/nvitop/tui/screens/main/panels/device.py
+++ b/nvitop/tui/screens/main/panels/device.py
@ -79,8 +79,15 @@ class DevicePanel(BasePanel):  # pylint: disable=too-many-instance-attributes
        if self.device_count == 0:
            self.height = self.full_height = self.compact_height = 6

+        self.backend: str = Device.backend()
        self.driver_version: str = Device.driver_version()
        self.cuda_driver_version: str = Device.cuda_driver_version()
+        self.driver_version_label: str = (
+            'KMD Version' if self.backend == 'mx-smi' else 'Driver Version'
+        )
+        self.cuda_driver_version_label: str = (
+            'MACA Version' if self.backend == 'mx-smi' else 'CUDA Driver Version'
+        )

        self._snapshot_buffer: list[Snapshot] = []
        self._snapshots: list[Snapshot] = []
@ -226,8 +233,8 @@ class DevicePanel(BasePanel):  # pylint: disable=too-many-instance-attributes

        version_infos = [
            'NVITOP {}'.format(__version__.partition('+')[0]),
-            f'Driver Version: {self.driver_version}',
-            f'CUDA Driver Version: {self.cuda_driver_version}',
+            f'{self.driver_version_label}: {self.driver_version}',
+            f'{self.cuda_driver_version_label}: {self.cuda_driver_version}',
        ]
        if sum(len(v) for v in version_infos) % 2 == 0:
            version_infos[0] += ' '