feat(linter): mypy integration (#73)

2026-05-15 14:15:55 -06:00 · 2023-05-01 13:02:01 +08:00 · 2023-05-01 13:02:01 +08:00 · f0b055bfcf
commit f0b055bfcf
parent 2408735f54
22 changed files with 640 additions and 355 deletions
--- a/.flake8
+++ b/.flake8
@ -20,6 +20,7 @@ per-file-ignores =
    # F401: module imported but unused
    # intentionally unused imports
    __init__.py: F401
+    nvitop/api/host.py: F401
    # SIM113: use enumarate
    # false positive
    nvitop/gui/screens/main/process.py: SIM113
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -72,7 +72,7 @@ jobs:
          python -m venv venv &&
          (
            source venv/bin/activate &&
-            python -m pip install --upgrade pip setuptools pre-commit pylint[spelling]
+            python -m pip install --upgrade pip setuptools pre-commit pylint[spelling] mypy typing-extensions
            python -m pip install -r requirements.txt &&
            python -m pre_commit install --install-hooks &&
            python -m pre_commit run --all-files &&
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@ -69,7 +69,7 @@ jobs:

      - name: Install linters
        run: |
-          python -m pip install --upgrade pre-commit pylint[spelling]
+          python -m pip install --upgrade pre-commit pylint[spelling] mypy typing-extensions

      - name: pre-commit
        run: |
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -25,7 +25,7 @@ repos:
      - id: debug-statements
      - id: double-quote-string-fixer
  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.262
+    rev: v0.0.263
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix]
@ -38,10 +38,10 @@ repos:
    hooks:
      - id: black
  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.3.2
    hooks:
      - id: pyupgrade
-        args: [--py37-plus]
+        args: [--py37-plus] # sync with requires-python
  - repo: https://github.com/pycqa/flake8
    rev: 6.0.0
    hooks:
@ -59,8 +59,8 @@ repos:
  - repo: https://github.com/codespell-project/codespell
    rev: v2.2.4
    hooks:
-    - id: codespell
-      additional_dependencies: [".[toml]"]
+      - id: codespell
+        additional_dependencies: [".[toml]"]
  - repo: local
    hooks:
      - id: pylint
@ -80,3 +80,11 @@ repos:
            ^nvitop/callbacks/|
            ^docs/
          )
+  - repo: local
+    hooks:
+      - id: mypy
+        name: mypy
+        entry: mypy
+        language: system
+        types_or: [python, pyi]
+        require_serial: true
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

-
+- Add `mypy` integration and update type annotations by [@XuehaiPan](https://github.com/XuehaiPan) in [#73](https://github.com/XuehaiPan/nvitop/pull/73).

 ### Changed

@ -33,7 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Fixed

- Further isolate the `CUDA_VISIBLE_DEVICE` parser in a subprocess by [@XuehaiPan](https://github.com/XuehaiPan) in [#70](https://github.com/XuehaiPan/nvitop/pull/70).
+- Further isolate the `CUDA_VISIBLE_DEVICES` parser in a subprocess by [@XuehaiPan](https://github.com/XuehaiPan) in [#70](https://github.com/XuehaiPan/nvitop/pull/70).

 ------

--- a/README.md
+++ b/README.md
@ -686,6 +686,7 @@ for device in devices:
 ```python
 In [1]: from nvitop import take_snapshots, Device
   ...: import os
+   ...: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
   ...: os.environ['CUDA_VISIBLE_DEVICES'] = '1,0'  # comma-separated integers or UUID strings

 In [2]: take_snapshots()  # equivalent to `take_snapshots(Device.all())`
@ -763,6 +764,7 @@ Please refer to section [Low-level APIs](#low-level-apis) for more information.
 ```python
 In [1]: from nvitop import ResourceMetricCollector, Device
   ...: import os
+   ...: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
   ...: os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'  # comma-separated integers or UUID strings

 In [2]: collector = ResourceMetricCollector()                                   # log all devices and descendant processes of the current process on the GPUs
@ -983,6 +985,7 @@ In [1]: from nvitop import (
   ...:     NA,
   ...: )
   ...: import os
+   ...: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
   ...: os.environ['CUDA_VISIBLE_DEVICES'] = '9,8,7,6'  # comma-separated integers or UUID strings

 In [2]: Device.driver_version()
--- a/docs/source/spelling_wordlist.txt
+++ b/docs/source/spelling_wordlist.txt
@ -140,3 +140,4 @@ noqa
 uptime
 ot
 oT
+mypy
--- a/nvitop/api/collector.py
+++ b/nvitop/api/collector.py
@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Resource metrics collectors."""

 from __future__ import annotations
@ -26,7 +25,7 @@ import os
 import threading
 import time
 from collections import OrderedDict, defaultdict
-from typing import Callable, Hashable, Iterable, NamedTuple
+from typing import Callable, Generator, Iterable, NamedTuple, TypeVar
 from weakref import WeakSet

 from nvitop.api import host
@ -46,7 +45,10 @@ class SnapshotResult(NamedTuple):  # pylint: disable=missing-class-docstring
 timer = time.monotonic


-def _unique(iterable: Iterable[Hashable]) -> list[Hashable]:
+_T = TypeVar('_T')
+
+
+def _unique(iterable: Iterable[_T]) -> list[_T]:
    return list(OrderedDict.fromkeys(iterable).keys())


@ -78,6 +80,7 @@ def take_snapshots(
    Examples:
        >>> from nvitop import take_snapshots, Device
        >>> import os
+        >>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        >>> os.environ['CUDA_VISIBLE_DEVICES'] = '1,0'

        >>> take_snapshots()  # equivalent to `take_snapshots(Device.all())`
@ -145,8 +148,8 @@ def take_snapshots(
    if isinstance(gpu_processes, GpuProcess):
        gpu_processes = [gpu_processes]

-    if gpu_processes is not None:
-        if gpu_processes:  # is not False or is a non-empty list/tuple
+    if gpu_processes is not None and gpu_processes is not True:
+        if gpu_processes:  # is a non-empty list/tuple
            gpu_processes = list(gpu_processes)
            process_devices = _unique(process.device for process in gpu_processes)
            for device in process_devices:
@ -161,7 +164,7 @@ def take_snapshots(
        if devices is None:
            physical_devices = Device.all()
            devices = []
-            leaf_devices = []
+            leaf_devices: list[Device] = []
            for physical_device in physical_devices:
                devices.append(physical_device)
                mig_devices = physical_device.mig_devices()
@ -176,7 +179,7 @@ def take_snapshots(
            itertools.chain.from_iterable(device.processes().values() for device in leaf_devices),
        )

-    devices = [device.as_snapshot() for device in devices]
+    devices = [device.as_snapshot() for device in devices]  # type: ignore[union-attr]
    gpu_processes = GpuProcess.take_snapshots(gpu_processes, failsafe=True)

    return SnapshotResult(devices, gpu_processes)
@ -254,22 +257,22 @@ def collect_in_background(

    def target() -> None:
        if on_start is not None:
-            on_start(collector)
+            on_start(collector)  # type: ignore[arg-type]
        try:
-            with collector(tag):
+            with collector(tag):  # type: ignore[misc]
                try:
-                    next_snapshot = timer() + interval
-                    while on_collect(collector.collect()):
+                    next_snapshot = timer() + interval  # type: ignore[operator]
+                    while on_collect(collector.collect()):  # type: ignore[union-attr]
                        time.sleep(max(0.0, next_snapshot - timer()))
-                        next_snapshot += interval
+                        next_snapshot += interval  # type: ignore[operator]
                except KeyboardInterrupt:
                    pass
        finally:
            if on_stop is not None:
-                on_stop(collector)
+                on_stop(collector)  # type: ignore[arg-type]

    daemon = threading.Thread(target=target, name=tag, daemon=True)
-    daemon.collector = collector
+    daemon.collector = collector  # type: ignore[attr-defined]
    if start:
        daemon.start()
    return daemon
@ -304,6 +307,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes

    Examples:
        >>> import os
+        >>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        >>> os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'

        >>> from nvitop import ResourceMetricCollector, Device
@ -404,13 +408,13 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
        if devices is None:
            devices = Device.all()

-        root_pids = {os.getpid()} if root_pids is None else set(root_pids)
+        root_pids: set[int] = {os.getpid()} if root_pids is None else set(root_pids)

-        self.interval = interval
+        self.interval: float = interval

-        self.devices = list(devices)
-        self.all_devices = []
-        self.leaf_devices = []
+        self.devices: list[Device] = list(devices)
+        self.all_devices: list[Device] = []
+        self.leaf_devices: list[Device] = []
        for device in self.devices:
            self.all_devices.append(device)
            mig_devices = device.mig_devices()
@ -420,21 +424,23 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
            else:
                self.leaf_devices.append(device)

-        self.root_pids = root_pids
-        self._positive_processes = WeakSet(HostProcess(pid) for pid in self.root_pids)
-        self._negative_processes = WeakSet()
+        self.root_pids: set[int] = root_pids
+        self._positive_processes: WeakSet[HostProcess] = WeakSet(
+            HostProcess(pid) for pid in self.root_pids
+        )
+        self._negative_processes: WeakSet[HostProcess] = WeakSet()

-        self._last_timestamp = timer() - 2.0 * self.interval
-        self._lock = threading.RLock()
-        self._metric_buffer = None
-        self._tags = set()
+        self._last_timestamp: float = timer() - 2.0 * self.interval
+        self._lock: threading.RLock = threading.RLock()
+        self._metric_buffer: _MetricBuffer | None = None
+        self._tags: set[str] = set()

-        self._daemon = threading.Thread(
+        self._daemon: threading.Thread = threading.Thread(
            name='gpu_metric_collector_daemon',
            target=self._target,
            daemon=True,
        )
-        self._daemon_running = threading.Event()
+        self._daemon_running: threading.Event = threading.Event()

    def activate(self, tag: str) -> ResourceMetricCollector:
        """Start a new metric collection with the given tag.
@ -500,7 +506,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
                if buffer.tag == tag:
                    self._metric_buffer = buffer.prev
                    break
-                buffer = buffer.prev
+                buffer = buffer.prev  # type: ignore[assignment]

            if self._metric_buffer is None:
                self._daemon_running.clear()
@ -510,7 +516,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
    stop = deactivate

    @contextlib.contextmanager
-    def context(self, tag: str) -> ResourceMetricCollector:
+    def context(self, tag: str) -> Generator[ResourceMetricCollector, None, None]:
        """A context manager for starting and stopping resource metric collection.

        Args:
@ -578,7 +584,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
                buffer.clear()
                if buffer.tag == tag:
                    break
-                buffer = buffer.prev
+                buffer = buffer.prev  # type: ignore[assignment]

    def collect(self) -> dict[str, float]:
        """Get the average resource consumption during collection."""
@ -665,7 +671,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
    def take_snapshots(self) -> SnapshotResult:
        """Take snapshots of the current resource metrics and update the metric buffer."""
        if len(self.root_pids) > 0:
-            all_gpu_processes = []
+            all_gpu_processes: list[GpuProcess] = []
            for device in self.leaf_devices:
                all_gpu_processes.extend(device.processes().values())

@ -685,7 +691,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
                            positive = True
                            break
                        try:
-                            p = p.parent()
+                            p = p.parent()  # type: ignore[assignment]
                        except host.PsutilError:
                            break
                    if positive:
@ -700,8 +706,8 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes

        timestamp = timer()
        metrics = {}
-        devices = [device.as_snapshot() for device in self.all_devices]
-        gpu_processes = GpuProcess.take_snapshots(gpu_processes, failsafe=True)
+        device_snapshots = [device.as_snapshot() for device in self.all_devices]
+        gpu_process_snapshots = GpuProcess.take_snapshots(gpu_processes, failsafe=True)

        metrics.update(
            {
@ -722,23 +728,23 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
            )

        device_identifiers = {}
-        for device in devices:
-            identifier = f'gpu:{device.index}'
-            if isinstance(device.real, CudaDevice):
-                identifier = f'cuda:{device.cuda_index} ({identifier})'
-            device_identifiers[device.real] = identifier
+        for device_snapshot in device_snapshots:
+            identifier = f'gpu:{device_snapshot.index}'
+            if isinstance(device_snapshot.real, CudaDevice):
+                identifier = f'cuda:{device_snapshot.cuda_index} ({identifier})'
+            device_identifiers[device_snapshot.real] = identifier

            for attr, name, unit in self.DEVICE_METRICS:
-                value = float(getattr(device, attr)) / unit
+                value = float(getattr(device_snapshot, attr)) / unit
                metrics[f'{identifier}/{name}'] = value

-        for process in gpu_processes:
-            device_identifier = device_identifiers[process.device]
-            identifier = f'pid:{process.pid}'
+        for process_snapshot in gpu_process_snapshots:
+            device_identifier = device_identifiers[process_snapshot.device]
+            identifier = f'pid:{process_snapshot.pid}'

            for attr, scope, name, unit in self.PROCESS_METRICS:
                scope = scope or device_identifier
-                value = float(getattr(process, attr)) / unit
+                value = float(getattr(process_snapshot, attr)) / unit
                metrics[f'{identifier}/{scope}/{name}'] = value

        with self._lock:
@ -746,7 +752,7 @@ class ResourceMetricCollector:  # pylint: disable=too-many-instance-attributes
                self._metric_buffer.add(metrics, timestamp=timestamp)
                self._last_timestamp = timestamp

-        return SnapshotResult(devices, gpu_processes)
+        return SnapshotResult(device_snapshots, gpu_process_snapshots)

    def _target(self) -> None:
        self._daemon_running.wait()
@ -762,17 +768,20 @@ class _MetricBuffer:  # pylint: disable=missing-class-docstring,missing-function
        collector: ResourceMetricCollector,
        prev: _MetricBuffer | None = None,
    ) -> None:
-        self.collector = collector
-        self.prev = prev
+        self.collector: ResourceMetricCollector = collector
+        self.prev: _MetricBuffer | None = prev

-        self.tag = tag
+        self.tag: str = tag
+        self.key_prefix: str
        if self.prev is not None:
            self.key_prefix = f'{self.prev.key_prefix}/{self.tag}'
        else:
            self.key_prefix = self.tag

        self.last_timestamp = self.start_timestamp = timer()
-        self.buffer = defaultdict(lambda: _StatisticsMaintainer(self.last_timestamp))
+        self.buffer: defaultdict[str, _StatisticsMaintainer] = defaultdict(
+            lambda: _StatisticsMaintainer(self.last_timestamp),
+        )

        self.len = 0

@ -817,13 +826,13 @@ class _MetricBuffer:  # pylint: disable=missing-class-docstring,missing-function

 class _StatisticsMaintainer:  # pylint: disable=missing-class-docstring,missing-function-docstring
    def __init__(self, timestamp: float) -> None:
-        self.start_timestamp = timestamp
-        self.last_timestamp = None
-        self.integral = None
-        self.last_value = None
-        self.min_value = None
-        self.max_value = None
-        self.has_nan = False
+        self.start_timestamp: float = timestamp
+        self.last_timestamp: float = math.nan
+        self.integral: float | None = None
+        self.last_value: float | None = None
+        self.min_value: float | None = None
+        self.max_value: float | None = None
+        self.has_nan: bool = False

    def add(self, value: float, timestamp: float | None = None) -> None:
        if timestamp is None:
@ -837,30 +846,32 @@ class _StatisticsMaintainer:  # pylint: disable=missing-class-docstring,missing-
            self.integral = value * (timestamp - self.start_timestamp)
            self.last_value = self.min_value = self.max_value = value
        else:
-            self.integral += (value + self.last_value) * (timestamp - self.last_timestamp) / 2.0
+            # pylint: disable-next=line-too-long
+            self.integral += (value + self.last_value) * (timestamp - self.last_timestamp) / 2.0  # type: ignore[operator]
            self.last_value = value
-            self.min_value = min(self.min_value, value)
-            self.max_value = max(self.max_value, value)
+            self.min_value = min(self.min_value, value)  # type: ignore[type-var]
+            self.max_value = max(self.max_value, value)  # type: ignore[type-var]

        self.last_timestamp = timestamp

    def mean(self) -> float:
+        if self.integral is None:
+            return math.nan
+
        if self.has_nan:
-            if self.integral is None:
-                return math.nan
            return self.integral / (self.last_timestamp - self.start_timestamp)

        timestamp = timer()
-        integral = self.integral + self.last_value * (timestamp - self.last_timestamp)
+        integral = self.integral + self.last_value * (timestamp - self.last_timestamp)  # type: ignore[operator]
        return integral / (timestamp - self.start_timestamp)

    def min(self) -> float:
-        if self.has_nan and self.min_value is None:
+        if self.min_value is None:
            return math.nan
        return self.min_value

    def max(self) -> float:
-        if self.has_nan and self.max_value is None:
+        if self.max_value is None:
            return math.nan
        return self.max_value

--- a/nvitop/api/device.py
+++ b/nvitop/api/device.py
@ -69,6 +69,7 @@ Examples:
    )

    >>> import os
+    >>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    >>> os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'

    >>> CudaDevice.count()                     # number of NVIDIA GPUs visible to CUDA applications
@ -113,13 +114,17 @@ import sys
 import textwrap
 import threading
 from collections import OrderedDict
-from typing import Any, Callable, Iterable, NamedTuple
+from typing import TYPE_CHECKING, Any, Callable, Generator, Hashable, Iterable, NamedTuple, overload

 from nvitop.api import libcuda, libcudart, libnvml
 from nvitop.api.process import GpuProcess
 from nvitop.api.utils import NA, NaType, Snapshot, boolify, bytes2human, memoize_when_activated


+if TYPE_CHECKING:
+    from typing_extensions import Literal  # Python 3.8+
+
+
 __all__ = [
    'Device',
    'PhysicalDevice',
@ -158,7 +163,7 @@ class UtilizationRates(NamedTuple):  # in percentage # pylint: disable=missing-c
    decoder: int | NaType


-_VALUE_OMITTED = object()
+_VALUE_OMITTED: str = object()  # type: ignore[assignment]


 class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-methods
@ -226,7 +231,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
    # GPU UUID        : `GPU-<GPU-UUID>`
    # MIG UUID        : `MIG-GPU-<GPU-UUID>/<GPU instance ID>/<compute instance ID>`
    # MIG UUID (R470+): `MIG-<MIG-UUID>`
-    UUID_PATTERN = re.compile(
+    UUID_PATTERN: re.Pattern = re.compile(
        r"""^  # full match
        (?:(?P<MigMode>MIG)-)?                                 # prefix for MIG UUID
        (?:(?P<GpuUuid>GPU)-)?                                 # prefix for GPU UUID
@ -243,10 +248,12 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        flags=re.VERBOSE,
    )

-    GPU_PROCESS_CLASS = GpuProcess
-    cuda = None  # defined in below
+    GPU_PROCESS_CLASS: type[GpuProcess] = GpuProcess
+    cuda: type[CudaDevice] = None  # type: ignore[assignment] # defined in below
    """Shortcut for class :class:`CudaDevice`."""

+    _nvml_index: int | tuple[int, int]
+
    @classmethod
    def is_available(cls) -> bool:
        """Test whether there are any devices and the NVML library is successfully loaded."""
@ -347,7 +354,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
    @classmethod
    def all(cls) -> list[PhysicalDevice]:
        """Return a list of all physical devices in the system."""
-        return cls.from_indices()
+        return cls.from_indices()  # type: ignore[return-value]

    @classmethod
    def from_indices(
@ -388,7 +395,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        if isinstance(indices, int):
            indices = [indices]

-        return list(map(cls, indices))
+        return list(map(cls, indices))  # type: ignore[arg-type]

    @staticmethod
    def from_cuda_visible_devices() -> list[CudaDevice]:
@ -408,8 +415,9 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """  # pylint: disable=line-too-long
        visible_device_indices = Device.parse_cuda_visible_devices()

-        cuda_devices = []
-        for cuda_index, device_index in enumerate(visible_device_indices):
+        device_index: int | tuple[int, int]
+        cuda_devices: list[CudaDevice] = []
+        for cuda_index, device_index in enumerate(visible_device_indices):  # type: ignore[assignment]
            cuda_devices.append(CudaDevice(cuda_index, nvml_index=device_index))

        return cuda_devices
@ -552,7 +560,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        if cls is not Device:
            return super().__new__(cls)

-        match = None
+        match: re.Match | None = None
        if isinstance(index, str):
            match = cls.UUID_PATTERN.match(index)
            if match is not None:  # passed by UUID
@ -608,17 +616,17 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            arg.encode() if isinstance(arg, str) else arg for arg in (index, uuid, bus_id)
        )

-        self._name = NA
-        self._uuid = NA
-        self._bus_id = NA
-        self._memory_total = NA
-        self._memory_total_human = NA
-        self._is_mig_device = None
-        self._cuda_index = None
-        self._cuda_compute_capability = None
+        self._name: str = NA
+        self._uuid: str = NA
+        self._bus_id: str = NA
+        self._memory_total: int | NaType = NA
+        self._memory_total_human: str = NA
+        self._is_mig_device: bool | None = None
+        self._cuda_index: int | None = None
+        self._cuda_compute_capability: tuple[int, int] | NaType | None = None

        if index is not None:
-            self._nvml_index = index
+            self._nvml_index = index  # type: ignore[assignment]
            try:
                self._handle = libnvml.nvmlQuery(
                    'nvmlDeviceGetHandleByIndex',
@ -647,21 +655,21 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                    )
            except libnvml.NVMLError_GpuIsLost:
                self._handle = None
-                self._nvml_index = NA
+                self._nvml_index = NA  # type: ignore[assignment]
                self._name = 'ERROR: GPU is Lost'
            except libnvml.NVMLError_Unknown:
                self._handle = None
-                self._nvml_index = NA
+                self._nvml_index = NA  # type: ignore[assignment]
                self._name = 'ERROR: Unknown'
            else:
                self._nvml_index = libnvml.nvmlQuery('nvmlDeviceGetIndex', self._handle)

-        self._max_clock_infos = ClockInfos(graphics=NA, sm=NA, memory=NA, video=NA)
-        self._timestamp = 0
-        self._lock = threading.RLock()
+        self._max_clock_infos: ClockInfos = ClockInfos(graphics=NA, sm=NA, memory=NA, video=NA)
+        self._timestamp: int = 0
+        self._lock: threading.RLock = threading.RLock()

-        self._ident = (self.index, self.uuid())
-        self._hash = None
+        self._ident: tuple[Hashable, str] = (self.index, self.uuid())
+        self._hash: int | None = None

    def __repr__(self) -> str:
        """Return a string representation of the device."""
@ -709,7 +717,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            (8, 6)
        """  # pylint: disable=line-too-long
        try:
-            return super().__getattr__(name)
+            return super().__getattr__(name)  # type: ignore[misc]
        except AttributeError:
            if name == '_cache':
                raise
@ -779,7 +787,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
            An int for the physical device index. For MIG devices, returns the index of the parent
            physical device.
        """
-        return self._nvml_index  # will be overridden in MigDevice
+        return self._nvml_index  # type: ignore[return-value] # will be overridden in MigDevice

    @property
    def handle(self) -> libnvml.c_nvmlDevice_t:
@ -800,7 +808,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        if self._cuda_index is None:
            visible_device_indices = self.parse_cuda_visible_devices()
            try:
-                self._cuda_index = visible_device_indices.index(self.index)
+                self._cuda_index = visible_device_indices.index(self.index)  # type: ignore[arg-type]
            except ValueError as ex:
                raise RuntimeError(
                    f'CUDA Error: Device(index={self.index}) is not visible to CUDA applications',
@ -1628,13 +1636,15 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """
        return self.is_mig_device() or not self.is_mig_mode_enabled()

-    def to_leaf_devices(self) -> list[PhysicalDevice | MigDevice | CudaDevice]:
+    def to_leaf_devices(
+        self,
+    ) -> list[PhysicalDevice] | list[MigDevice] | list[CudaDevice] | list[CudaMigDevice]:
        """Return a list of leaf devices.

        Note that a CUDA device is always a leaf device.
        """
        if isinstance(self, CudaDevice) or self.is_leaf_device():
-            return [self]
+            return [self]  # type: ignore[return-value]
        return self.mig_devices()

    def processes(self) -> dict[int, GpuProcess]:
@ -1656,7 +1666,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                else:
                    # Used GPU memory is `N/A` on Windows Display Driver Model (WDDM)
                    # or on MIG-enabled GPUs
-                    gpu_memory = NA
+                    gpu_memory = NA  # type: ignore[assignment]
                    found_na = True
                proc = processes[p.pid] = self.GPU_PROCESS_CLASS(
                    pid=p.pid,
@ -1740,7 +1750,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

    # Modified from psutil (https://github.com/giampaolo/psutil)
    @contextlib.contextmanager
-    def oneshot(self) -> contextlib.AbstractContextManager:
+    def oneshot(self) -> Generator[None, None, None]:
        """A utility context manager which considerably speeds up the retrieval of multiple device information at the same time.

        Internally different device info (e.g. memory_info, utilization_rates, ...) may be fetched
@ -1781,22 +1791,22 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                yield
            else:
                try:
-                    self.memory_info.cache_activate(self)
-                    self.bar1_memory_info.cache_activate(self)
-                    self.utilization_rates.cache_activate(self)
-                    self.clock_infos.cache_activate(self)
-                    self.max_clock_infos.cache_activate(self)
-                    self.power_usage.cache_activate(self)
-                    self.power_limit.cache_activate(self)
+                    self.memory_info.cache_activate(self)  # type: ignore[attr-defined]
+                    self.bar1_memory_info.cache_activate(self)  # type: ignore[attr-defined]
+                    self.utilization_rates.cache_activate(self)  # type: ignore[attr-defined]
+                    self.clock_infos.cache_activate(self)  # type: ignore[attr-defined]
+                    self.max_clock_infos.cache_activate(self)  # type: ignore[attr-defined]
+                    self.power_usage.cache_activate(self)  # type: ignore[attr-defined]
+                    self.power_limit.cache_activate(self)  # type: ignore[attr-defined]
                    yield
                finally:
-                    self.memory_info.cache_deactivate(self)
-                    self.bar1_memory_info.cache_deactivate(self)
-                    self.utilization_rates.cache_deactivate(self)
-                    self.clock_infos.cache_deactivate(self)
-                    self.max_clock_infos.cache_deactivate(self)
-                    self.power_usage.cache_deactivate(self)
-                    self.power_limit.cache_deactivate(self)
+                    self.memory_info.cache_deactivate(self)  # type: ignore[attr-defined]
+                    self.bar1_memory_info.cache_deactivate(self)  # type: ignore[attr-defined]
+                    self.utilization_rates.cache_deactivate(self)  # type: ignore[attr-defined]
+                    self.clock_infos.cache_deactivate(self)  # type: ignore[attr-defined]
+                    self.max_clock_infos.cache_deactivate(self)  # type: ignore[attr-defined]
+                    self.power_usage.cache_deactivate(self)  # type: ignore[attr-defined]
+                    self.power_limit.cache_deactivate(self)  # type: ignore[attr-defined]


 class PhysicalDevice(Device):
@ -1805,6 +1815,10 @@ class PhysicalDevice(Device):
    This is the real GPU installed in the system.
    """

+    _nvml_index: int
+    index: int
+    nvml_index: int
+
    @property
    def physical_index(self) -> int:
        """Zero based index of the GPU. Can change at each boot.
@ -1864,13 +1878,16 @@ class PhysicalDevice(Device):
 class MigDevice(Device):  # pylint: disable=too-many-instance-attributes
    """Class for MIG devices."""

+    _nvml_index: tuple[int, int]
+    nvml_index: tuple[int, int]
+
    @classmethod
    def count(cls) -> int:
        """The number of total MIG devices aggregated over all physical devices."""
        return len(cls.all())

    @classmethod
-    def all(cls) -> list[MigDevice]:
+    def all(cls) -> list[MigDevice]:  # type: ignore[override]
        """Return a list of MIG devices aggregated over all physical devices."""
        mig_devices = []
        for device in PhysicalDevice.all():
@ -1878,7 +1895,7 @@ class MigDevice(Device):  # pylint: disable=too-many-instance-attributes
        return mig_devices

    @classmethod
-    def from_indices(  # pylint: disable=signature-differs
+    def from_indices(  # type: ignore[override] # pylint: disable=signature-differs
        cls,
        indices: Iterable[tuple[int, int]],
    ) -> list[MigDevice]:
@ -1929,19 +1946,19 @@ class MigDevice(Device):  # pylint: disable=too-many-instance-attributes

        index, uuid = (arg.encode() if isinstance(arg, str) else arg for arg in (index, uuid))

-        self._name = NA
-        self._uuid = NA
-        self._bus_id = NA
-        self._memory_total = NA
-        self._memory_total_human = NA
-        self._gpu_instance_id = NA
-        self._compute_instance_id = NA
-        self._is_mig_device = True
-        self._cuda_index = None
-        self._cuda_compute_capability = None
+        self._name: str = NA
+        self._uuid: str = NA
+        self._bus_id: str = NA
+        self._memory_total: int | NaType = NA
+        self._memory_total_human: str = NA
+        self._gpu_instance_id: int | NaType = NA
+        self._compute_instance_id: int | NaType = NA
+        self._is_mig_device: bool = True
+        self._cuda_index: int | None = None
+        self._cuda_compute_capability: tuple[int, int] | NaType | None = None

        if index is not None:
-            self._nvml_index = index
+            self._nvml_index = index  # type: ignore[assignment]
            self._handle = None

            parent = _get_global_physical_device()
@ -2047,7 +2064,7 @@ class MigDevice(Device):  # pylint: disable=too-many-instance-attributes
        The attributes are defined in :attr:`SNAPSHOT_KEYS`.
        """
        snapshot = super().as_snapshot()
-        snapshot.mig_index = self.mig_index
+        snapshot.mig_index = self.mig_index  # type: ignore[attr-defined]

        return snapshot

@ -2074,6 +2091,7 @@ class CudaDevice(Device):

    Examples:
        >>> import os
+        >>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        >>> os.environ['CUDA_VISIBLE_DEVICES'] = '3,2,1,0'

        >>> CudaDevice.count()                     # number of NVIDIA GPUs visible to CUDA applications
@ -2123,6 +2141,10 @@ class CudaDevice(Device):
            If the index is out of range for the given ``CUDA_VISIBLE_DEVICES`` environment variable.
    """  # pylint: disable=line-too-long

+    _nvml_index: int
+    index: int
+    nvml_index: int
+
    @classmethod
    def is_available(cls) -> bool:
        """Test whether there are any CUDA-capable devices available."""
@ -2137,7 +2159,7 @@ class CudaDevice(Device):
            return 0

    @classmethod
-    def all(cls) -> list[CudaDevice]:
+    def all(cls) -> list[CudaDevice]:  # type: ignore[override]
        """All CUDA visible devices.

        Note:
@ -2146,7 +2168,7 @@ class CudaDevice(Device):
        return cls.from_indices()

    @classmethod
-    def from_indices(
+    def from_indices(  # type: ignore[override]
        cls,
        indices: int | Iterable[int] | None = None,
    ) -> list[CudaDevice]:
@ -2184,7 +2206,7 @@ class CudaDevice(Device):
        *,
        nvml_index: int | tuple[int, int] | None = None,
        uuid: str | None = None,
-    ) -> Device:
+    ) -> CudaDevice:
        """Create a new instance of CudaDevice.

        The type of the result is determined by the given argument.
@ -2209,16 +2231,22 @@ class CudaDevice(Device):
            RuntimeError:
                If the index is out of range for the given ``CUDA_VISIBLE_DEVICES`` environment variable.
        """
+        if nvml_index is not None and uuid is not None:
+            raise TypeError(
+                f'CudaDevice(cuda_index=None, nvml_index=None, uuid=None) takes 1 non-None arguments '
+                f'but (cuda_index, nvml_index, uuid) = {(cuda_index, nvml_index, uuid)!r} were given',
+            )
+
        if cuda_index is not None and nvml_index is None and uuid is None:
            cuda_visible_devices = cls.parse_cuda_visible_devices()
            if not isinstance(cuda_index, int) or not 0 <= cuda_index < len(cuda_visible_devices):
                raise RuntimeError(f'CUDA Error: invalid device ordinal: {cuda_index!r}.')
            nvml_index = cuda_visible_devices[cuda_index]

-        if not isinstance(nvml_index, int) or is_mig_device_uuid(uuid):
-            return super().__new__(CudaMigDevice, index=nvml_index, uuid=uuid)
+        if (nvml_index is not None and not isinstance(nvml_index, int)) or is_mig_device_uuid(uuid):
+            return super().__new__(CudaMigDevice, index=nvml_index, uuid=uuid)  # type: ignore[return-value]

-        return super().__new__(cls, index=nvml_index, uuid=uuid)
+        return super().__new__(cls, index=nvml_index, uuid=uuid)  # type: ignore[return-value]

    def __init__(
        self,
@ -2251,13 +2279,13 @@ class CudaDevice(Device):
                raise RuntimeError(f'CUDA Error: invalid device ordinal: {cuda_index!r}.')
            nvml_index = cuda_visible_devices[cuda_index]

-        super().__init__(index=nvml_index, uuid=uuid)
+        super().__init__(index=nvml_index, uuid=uuid)  # type: ignore[arg-type]

        if cuda_index is None:
            cuda_index = super().cuda_index
-        self._cuda_index = cuda_index
+        self._cuda_index: int = cuda_index

-        self._ident = ((self._cuda_index, self.index), self.uuid())
+        self._ident: tuple[Hashable, str] = ((self._cuda_index, self.index), self.uuid())

    def __repr__(self) -> str:
        """Return a string representation of the CUDA device."""
@ -2279,7 +2307,7 @@ class CudaDevice(Device):
        The attributes are defined in :attr:`SNAPSHOT_KEYS`.
        """
        snapshot = super().as_snapshot()
-        snapshot.cuda_index = self.cuda_index
+        snapshot.cuda_index = self.cuda_index  # type: ignore[attr-defined]

        return snapshot

@ -2288,9 +2316,13 @@ Device.cuda = CudaDevice
 """Shortcut for class :class:`CudaDevice`."""


-class CudaMigDevice(CudaDevice, MigDevice):
+class CudaMigDevice(CudaDevice, MigDevice):  # type: ignore[misc]
    """Class for CUDA devices that are MIG devices."""

+    _nvml_index: tuple[int, int]  # type: ignore[assignment]
+    index: tuple[int, int]  # type: ignore[assignment]
+    nvml_index: tuple[int, int]  # type: ignore[assignment]
+

 def is_mig_device_uuid(uuid: str | None) -> bool:
    """Return :data:`True` if the argument is a MIG device UUID, otherwise, return :data:`False`."""
@ -2327,6 +2359,7 @@ def parse_cuda_visible_devices(

    Examples:
        >>> import os
+        >>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        >>> os.environ['CUDA_VISIBLE_DEVICES'] = '6,5'
        >>> parse_cuda_visible_devices()        # parse the `CUDA_VISIBLE_DEVICES` environment variable to NVML indices
        [6, 5]
@ -2383,6 +2416,7 @@ def normalize_cuda_visible_devices(cuda_visible_devices: str | None = _VALUE_OMI

    Examples:
        >>> import os
+        >>> os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
        >>> os.environ['CUDA_VISIBLE_DEVICES'] = '6,5'
        >>> normalize_cuda_visible_devices()        # normalize the `CUDA_VISIBLE_DEVICES` environment variable to UUID strings
        'GPU-849d5a8d-610e-eeea-1fd4-81ff44a23794,GPU-18ef14e9-dec6-1d7e-1284-3010c6ce98b1'
@ -2420,7 +2454,7 @@ def normalize_cuda_visible_devices(cuda_visible_devices: str | None = _VALUE_OMI


 class _PhysicalDeviceAttrs(NamedTuple):
-    index: int
+    index: int  # type: ignore[assignment]
    name: str
    uuid: str
    support_mig_mode: bool
@ -2460,7 +2494,7 @@ def _does_any_device_support_mig_mode(uuids: Iterable[str] | None = None) -> boo


@contextlib.contextmanager
-def _global_physical_device(device: PhysicalDevice) -> PhysicalDevice:
+def _global_physical_device(device: PhysicalDevice) -> Generator[PhysicalDevice, None, None]:
    global _GLOBAL_PHYSICAL_DEVICE  # pylint: disable=global-statement

    with _GLOBAL_PHYSICAL_DEVICE_LOCK:
@ -2473,13 +2507,29 @@ def _global_physical_device(device: PhysicalDevice) -> PhysicalDevice:

 def _get_global_physical_device() -> PhysicalDevice:
    with _GLOBAL_PHYSICAL_DEVICE_LOCK:
-        return _GLOBAL_PHYSICAL_DEVICE
+        return _GLOBAL_PHYSICAL_DEVICE  # type: ignore[return-value]
+
+
+@overload
+def _parse_cuda_visible_devices(
+    cuda_visible_devices: str | None,
+    format: Literal['index'],  # pylint: disable=redefined-builtin
+) -> list[int] | list[tuple[int, int]]:
+    ...
+
+
+@overload
+def _parse_cuda_visible_devices(
+    cuda_visible_devices: str | None,
+    format: Literal['uuid'],  # pylint: disable=redefined-builtin
+) -> list[str]:
+    ...


@functools.lru_cache()
 def _parse_cuda_visible_devices(  # pylint: disable=too-many-branches,too-many-statements
    cuda_visible_devices: str | None = None,
-    format: str = 'index',  # pylint: disable=redefined-builtin
+    format: Literal['index', 'uuid'] = 'index',  # pylint: disable=redefined-builtin
 ) -> list[int] | list[tuple[int, int]] | list[str]:
    """The underlining implementation for :meth:`parse_cuda_visible_devices`. The result will be cached."""
    assert format in ('index', 'uuid')
@ -2487,7 +2537,7 @@ def _parse_cuda_visible_devices(  # pylint: disable=too-many-branches,too-many-s
    try:
        physical_device_attrs = _get_all_physical_device_attrs()
    except libnvml.NVMLError:
-        return []
+        return []  # type: ignore[return-value]
    gpu_uuids = set(physical_device_attrs)

    try:
@ -2532,6 +2582,10 @@ def _parse_cuda_visible_devices(  # pylint: disable=too-many-branches,too-many-s
    if cuda_visible_devices is None:
        cuda_visible_devices = ','.join(physical_device_attrs.keys())

+    devices: list[Device] = []
+    presented: set[str] = set()
+    use_integer_identifiers: bool | None = None
+
    def from_index_or_uuid(index_or_uuid: int | str) -> Device:
        nonlocal use_integer_identifiers

@ -2562,12 +2616,9 @@ def _parse_cuda_visible_devices(  # pylint: disable=too-many-branches,too-many-s
            identifier = identifier[:offset]
        return identifier

-    devices = []
-    presented = set()
-    use_integer_identifiers = None
    for identifier in map(strip_identifier, cuda_visible_devices.split(',')):
        if identifier in presented:
-            return []  # duplicate identifiers found
+            return []  # type: ignore[return-value] # duplicate identifiers found

        try:
            device = from_index_or_uuid(identifier)
@ -2600,7 +2651,7 @@ def _parse_cuda_visible_devices(  # pylint: disable=too-many-branches,too-many-s

    if format == 'uuid':
        return [device.uuid() for device in devices]
-    return [device.index for device in devices]
+    return [device.index for device in devices]  # type: ignore[return-value]


 def _parse_cuda_visible_devices_to_uuids(
@ -2655,7 +2706,7 @@ def _parse_cuda_visible_devices_to_uuids(


 def _cuda_visible_devices_parser(
-    cuda_visible_devices: str,
+    cuda_visible_devices: str | None,
    queue: mp.SimpleQueue,
    verbose: bool = True,
 ) -> None:
--- a/nvitop/api/host.py
+++ b/nvitop/api/host.py
@ -43,13 +43,21 @@ __all__ = [name for name in _psutil.__all__ if not name.startswith('_')] + [
 __all__[__all__.index('Error')] = 'PsutilError'


-PsutilError = Error  # make alias # noqa: F405
-del Error  # noqa: F821 # pylint: disable=undefined-variable
+PsutilError = Error = _psutil.Error  # make alias
+del Error


 cpu_percent = _psutil.cpu_percent
 virtual_memory = _psutil.virtual_memory
 swap_memory = _psutil.swap_memory
+Process = _psutil.Process
+NoSuchProcess = _psutil.NoSuchProcess
+ZombieProcess = _psutil.ZombieProcess
+AccessDenied = _psutil.AccessDenied
+POSIX = _psutil.POSIX
+WINDOWS = _psutil.WINDOWS
+LINUX = _psutil.LINUX
+MACOS = _psutil.MACOS


 if hasattr(_psutil, 'getloadavg'):
@ -60,7 +68,7 @@ if hasattr(_psutil, 'getloadavg'):

 else:

-    def load_average() -> None:
+    def load_average() -> None:  # type: ignore[misc]
        """Get the system load average."""
        return

@ -95,7 +103,7 @@ def reverse_ppid_map() -> dict[int, list[int]]:  # pylint: disable=function-rede
    return tree


-if LINUX:  # noqa: F405
+if LINUX:
    WSL = _os.getenv('WSL_DISTRO_NAME', default=None)
    if WSL is not None and WSL == '':
        WSL = 'WSL'
--- a/nvitop/api/libcuda.py
+++ b/nvitop/api/libcuda.py
@ -26,7 +26,13 @@ import platform as _platform
 import string as _string
 import sys as _sys
 import threading as _threading
+from typing import TYPE_CHECKING as _TYPE_CHECKING
 from typing import Any as _Any
+from typing import Callable as _Callable
+
+
+if _TYPE_CHECKING:
+    from typing_extensions import TypeAlias as _TypeAlias  # Python 3.10+


 # pylint: disable-next=missing-class-docstring,too-few-public-methods
@ -34,9 +40,9 @@ class _struct_c_CUdevice_t(_ctypes.Structure):
    pass  # opaque handle


-_c_CUdevice_t = _ctypes.POINTER(_struct_c_CUdevice_t)
+_c_CUdevice_t: _TypeAlias = _ctypes.POINTER(_struct_c_CUdevice_t)  # type: ignore[valid-type]

-_CUresult_t = _ctypes.c_uint
+_CUresult_t: _TypeAlias = _ctypes.c_uint

 # Error codes #
 # pylint: disable=line-too-long
@ -215,8 +221,8 @@ CUDA_ERROR_UNKNOWN = 999
 class CUDAError(Exception):
    """Base exception class for CUDA driver query errors."""

-    _value_class_mapping = {}
-    _errcode_to_string = {  # List of currently known error codes
+    _value_class_mapping: dict[int, type[CUDAError]] = {}
+    _errcode_to_string: dict[int, str] = {  # List of currently known error codes
        CUDA_ERROR_NOT_INITIALIZED:                'Initialization error.',
        CUDA_ERROR_NOT_FOUND:                      'Named symbol not found.',
        CUDA_ERROR_INVALID_VALUE:                  'Invalid argument.',
@ -227,7 +233,8 @@ class CUDAError(Exception):
        CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: 'Forward compatibility was attempted on non supported Hardware.',
        CUDA_ERROR_INVALID_CONTEXT:                'Invalid device context.',
    }  # fmt:skip
-    _errcode_to_name = {}
+    _errcode_to_name: dict[int, str] = {}
+    value: int

    def __new__(cls, value: int) -> CUDAError:
        """Map value to a proper subclass of :class:`CUDAError`."""
@ -295,8 +302,8 @@ def _extract_cuda_errors_as_classes() -> None:
        class_name = f'CUDAError_{pascal_case}'
        err_val = getattr(this_module, err_name)

-        def gen_new(value):
-            def new(cls):
+        def gen_new(value: int) -> _Callable[[type[CUDAError]], CUDAError]:
+            def new(cls: type[CUDAError]) -> CUDAError:
                return CUDAError.__new__(cls, value)

            return new
@ -317,6 +324,24 @@ def _extract_cuda_errors_as_classes() -> None:
        CUDAError._errcode_to_name[err_val] = err_name


+# Add explicit references to appease linters
+class __CUDAError(CUDAError):
+    value: int
+
+    def __new__(cls) -> CUDAError:  # type: ignore[misc,empty-body]
+        ...
+
+
+CUDAError_NotInitialized: type[__CUDAError]
+CUDAError_NotFound: type[__CUDAError]
+CUDAError_InvalidValue: type[__CUDAError]
+CUDAError_NoDevice: type[__CUDAError]
+CUDAError_InvalidDevice: type[__CUDAError]
+CUDAError_SystemDriverMismatch: type[__CUDAError]
+CUDAError_Deinitialized: type[__CUDAError]
+CUDAError_CompatNotSupportedOnDevice: type[__CUDAError]
+CUDAError_InvalidContext: type[__CUDAError]
+
 _extract_cuda_errors_as_classes()
 del _extract_cuda_errors_as_classes

@ -328,14 +353,14 @@ def _cudaCheckReturn(ret: _Any) -> _Any:


 # Function access #
-__cudaLib = None
-__initialized = False
-__libLoadLock = _threading.Lock()
+__cudaLib: _ctypes.CDLL | None = None
+__initialized: bool = False
+__libLoadLock: _threading.Lock = _threading.Lock()
 # Function pointers are cached to prevent unnecessary libLoadLock locking
-__cudaGetFunctionPointer_cache = {}
+__cudaGetFunctionPointer_cache: dict[str, _ctypes._CFuncPtr] = {}  # type: ignore[name-defined]


-def __cudaGetFunctionPointer(name: str) -> _ctypes._CFuncPtr:
+def __cudaGetFunctionPointer(name: str) -> _ctypes._CFuncPtr:  # type: ignore[name-defined]
    """Get the function pointer from the CUDA driver library.

    Raises:
@ -658,11 +683,10 @@ def cuDeviceGetUuid(device: _c_CUdevice_t) -> str:
    except CUDAError_NotFound:  # noqa: F821 # pylint: disable=undefined-variable
        fn = __cudaGetFunctionPointer('cuDeviceGetUuid')

-    ubyte_array = _ctypes.c_ubyte * 16
-    uuid = ubyte_array()
+    uuid = _ctypes.create_string_buffer(16)
    ret = fn(uuid, device)
    _cudaCheckReturn(ret)
-    uuid = ''.join(map('{:02x}'.format, uuid))
+    uuid = ''.join(map('{:02x}'.format, uuid.value))
    return '-'.join((uuid[:8], uuid[8:12], uuid[12:16], uuid[16:20], uuid[20:32]))


@ -682,11 +706,10 @@ def cuDeviceGetUuid_v2(device: _c_CUdevice_t) -> str:
    """
    fn = __cudaGetFunctionPointer('cuDeviceGetUuid_v2')

-    ubyte_array = _ctypes.c_ubyte * 16
-    uuid = ubyte_array()
+    uuid = _ctypes.create_string_buffer(16)
    ret = fn(uuid, device)
    _cudaCheckReturn(ret)
-    uuid = ''.join(map('{:0x}'.format, uuid.value))
+    uuid = ''.join(map('{:02x}'.format, uuid.value))
    return '-'.join((uuid[:8], uuid[8:12], uuid[12:16], uuid[16:20], uuid[20:32]))


--- a/nvitop/api/libcudart.py
+++ b/nvitop/api/libcudart.py
@ -27,6 +27,7 @@ import platform as _platform
 import sys as _sys
 import threading as _threading
 from typing import Any as _Any
+from typing import Callable as _Callable


 _cudaError_t = _ctypes.c_int
@ -266,8 +267,8 @@ cudaErrorUnknown = 999
 class cudaError(Exception):
    """Base exception class for CUDA driver query errors."""

-    _value_class_mapping = {}
-    _errcode_to_string = {  # List of currently known error codes
+    _value_class_mapping: dict[int, type[cudaError]] = {}
+    _errcode_to_string: dict[int, str] = {  # List of currently known error codes
        cudaErrorInitializationError:        'Initialization error.',
        cudaErrorSymbolNotFound:             'Named symbol not found.',
        cudaErrorInvalidValue:               'Invalid argument.',
@ -278,7 +279,8 @@ class cudaError(Exception):
        cudaErrorCompatNotSupportedOnDevice: 'Forward compatibility was attempted on non supported Hardware.',
        cudaErrorDeviceUninitialized:        'Invalid device context.',
    }  # fmt:skip
-    _errcode_to_name = {}
+    _errcode_to_name: dict[int, str] = {}
+    value: int

    def __new__(cls, value: int) -> cudaError:
        """Map value to a proper subclass of :class:`cudaError`."""
@ -349,8 +351,8 @@ def _extract_cuda_errors_as_classes() -> None:
        class_name = err_name.replace('cudaError', 'cudaError_')
        err_val = getattr(this_module, err_name)

-        def gen_new(value):
-            def new(cls):
+        def gen_new(value: int) -> _Callable[[type[cudaError]], cudaError]:
+            def new(cls: type[cudaError]) -> cudaError:
                return cudaError.__new__(cls, value)

            return new
@ -371,6 +373,24 @@ def _extract_cuda_errors_as_classes() -> None:
        cudaError._errcode_to_name[err_val] = err_name


+# Add explicit references to appease linters
+class __cudaError(cudaError):
+    value: int
+
+    def __new__(cls) -> cudaError:  # type: ignore[misc,empty-body]
+        ...
+
+
+cudaError_InitializationError: type[__cudaError]
+cudaError_SymbolNotFound: type[__cudaError]
+cudaError_InvalidValue: type[__cudaError]
+cudaError_NoDevice: type[__cudaError]
+cudaError_InvalidDevice: type[__cudaError]
+cudaError_SystemDriverMismatch: type[__cudaError]
+cudaError_CudartUnloading: type[__cudaError]
+cudaError_CompatNotSupportedOnDevice: type[__cudaError]
+cudaError_DeviceUninitialized: type[__cudaError]
+
 _extract_cuda_errors_as_classes()
 del _extract_cuda_errors_as_classes

@ -382,13 +402,13 @@ def _cudaCheckReturn(ret: _Any) -> _Any:


 # Function access #
-__cudaLib = None
-__libLoadLock = _threading.Lock()
+__cudaLib: _ctypes.CDLL | None = None
+__libLoadLock: _threading.Lock = _threading.Lock()
 # Function pointers are cached to prevent unnecessary libLoadLock locking
-__cudaGetFunctionPointer_cache = {}
+__cudaGetFunctionPointer_cache: dict[str, _ctypes._CFuncPtr] = {}  # type: ignore[name-defined]


-def __cudaGetFunctionPointer(name: str) -> _ctypes._CFuncPtr:
+def __cudaGetFunctionPointer(name: str) -> _ctypes._CFuncPtr:  # type: ignore[name-defined]
    """Get the function pointer from the CUDA Runtime library.

    Raises:
--- a/nvitop/api/libnvml.py
+++ b/nvitop/api/libnvml.py
@ -31,6 +31,7 @@ import sys as _sys
 import threading as _threading
 from types import FunctionType as _FunctionType
 from types import ModuleType as _ModuleType
+from typing import TYPE_CHECKING as _TYPE_CHECKING
 from typing import Any as _Any
 from typing import Callable as _Callable

@ -38,11 +39,16 @@ from typing import Callable as _Callable
 # https://pypi.org/project/nvidia-ml-py
 import pynvml as _pynvml
 from pynvml import *  # noqa: F403 # pylint: disable=wildcard-import,unused-wildcard-import
+from pynvml import nvmlDeviceGetPciInfo  # appease mypy # noqa: F401 # pylint: disable=unused-import

 from nvitop.api.utils import NA
 from nvitop.api.utils import colored as __colored


+if _TYPE_CHECKING:
+    from typing_extensions import TypeAlias as _TypeAlias  # Python 3.10+
+
+
 __all__ = [  # will be updated in below
    'NA',
    'nvmlCheckReturn',
@ -63,10 +69,10 @@ if not callable(getattr(_pynvml, 'nvmlInitWithFlags', None)):

 # Members from `pynvml` ############################################################################

-NVMLError = _pynvml.NVMLError
+NVMLError: type[_pynvml.NVMLError] = _pynvml.NVMLError
 NVMLError.__doc__ = """Base exception class for NVML query errors."""
 NVMLError.__new__.__doc__ = """Map value to a proper subclass of :class:`NVMLError`."""
-nvmlExceptionClass = _pynvml.nvmlExceptionClass
+nvmlExceptionClass: _Callable[[int], type[_pynvml.NVMLError]] = _pynvml.nvmlExceptionClass
 nvmlExceptionClass.__doc__ = """Map value to a proper subclass of :class:`NVMLError`."""

 # Load members from module `pynvml` and register them in `__all__` and globals.
@ -161,26 +167,40 @@ del (
    _sphinx_doc,
 )

+
 # 5. Add explicit references to appease linters
 # pylint: disable=no-member
-c_nvmlDevice_t = _pynvml.c_nvmlDevice_t
-NVMLError_FunctionNotFound = _pynvml.NVMLError_FunctionNotFound
-NVMLError_GpuIsLost = _pynvml.NVMLError_GpuIsLost
-NVMLError_InvalidArgument = _pynvml.NVMLError_InvalidArgument
-NVMLError_LibraryNotFound = _pynvml.NVMLError_LibraryNotFound
-NVMLError_NoPermission = _pynvml.NVMLError_NoPermission
-NVMLError_NotFound = _pynvml.NVMLError_NotFound
-NVMLError_NotSupported = _pynvml.NVMLError_NotSupported
-NVMLError_Unknown = _pynvml.NVMLError_Unknown
+c_nvmlDevice_t: _TypeAlias = _pynvml.c_nvmlDevice_t
+NVMLError_FunctionNotFound: _TypeAlias = _pynvml.NVMLError_FunctionNotFound
+NVMLError_GpuIsLost: _TypeAlias = _pynvml.NVMLError_GpuIsLost
+NVMLError_InvalidArgument: _TypeAlias = _pynvml.NVMLError_InvalidArgument
+NVMLError_LibraryNotFound: _TypeAlias = _pynvml.NVMLError_LibraryNotFound
+NVMLError_NoPermission: _TypeAlias = _pynvml.NVMLError_NoPermission
+NVMLError_NotFound: _TypeAlias = _pynvml.NVMLError_NotFound
+NVMLError_NotSupported: _TypeAlias = _pynvml.NVMLError_NotSupported
+NVMLError_Unknown: _TypeAlias = _pynvml.NVMLError_Unknown
+NVML_CLOCK_GRAPHICS: int = _pynvml.NVML_CLOCK_GRAPHICS
+NVML_CLOCK_SM: int = _pynvml.NVML_CLOCK_SM
+NVML_CLOCK_MEM: int = _pynvml.NVML_CLOCK_MEM
+NVML_CLOCK_VIDEO: int = _pynvml.NVML_CLOCK_VIDEO
+NVML_TEMPERATURE_GPU: int = _pynvml.NVML_TEMPERATURE_GPU
+NVML_DRIVER_WDDM: int = _pynvml.NVML_DRIVER_WDDM
+NVML_DRIVER_WDM: int = _pynvml.NVML_DRIVER_WDM
+NVML_MEMORY_ERROR_TYPE_UNCORRECTED: int = _pynvml.NVML_MEMORY_ERROR_TYPE_UNCORRECTED
+NVML_VOLATILE_ECC: int = _pynvml.NVML_VOLATILE_ECC
+NVML_COMPUTEMODE_DEFAULT: int = _pynvml.NVML_COMPUTEMODE_DEFAULT
+NVML_COMPUTEMODE_EXCLUSIVE_THREAD: int = _pynvml.NVML_COMPUTEMODE_EXCLUSIVE_THREAD
+NVML_COMPUTEMODE_PROHIBITED: int = _pynvml.NVML_COMPUTEMODE_PROHIBITED
+NVML_COMPUTEMODE_EXCLUSIVE_PROCESS: int = _pynvml.NVML_COMPUTEMODE_EXCLUSIVE_PROCESS
 # pylint: enable=no-member

 # New members in `libnvml` #########################################################################

-__flags = []
-__initialized = False
-__lock = _threading.Lock()
+__flags: list[int] = []
+__initialized: bool = False
+__lock: _threading.Lock = _threading.Lock()

-LOGGER = _logging.getLogger(__name__)
+LOGGER: _logging.Logger = _logging.getLogger(__name__)
 try:
    LOGGER.setLevel(_os.getenv('LOGLEVEL', default='WARNING').upper())
 except (ValueError, TypeError):
@ -197,9 +217,9 @@ if not LOGGER.hasHandlers() and LOGGER.isEnabledFor(_logging.DEBUG):
    LOGGER.addHandler(_file_handler)
    del _formatter, _stream_handler, _file_handler

-UNKNOWN_FUNCTIONS = {}
-UNKNOWN_FUNCTIONS_CACHE_SIZE = 1024
-VERSIONED_PATTERN = _re.compile(r'^(?P<name>\w+)(?P<suffix>_v(\d)+)$')
+UNKNOWN_FUNCTIONS: dict[str, tuple[_Callable | str, NVMLError_FunctionNotFound]] = {}
+UNKNOWN_FUNCTIONS_CACHE_SIZE: int = 1024
+VERSIONED_PATTERN: _re.Pattern = _re.compile(r'^(?P<name>\w+)(?P<suffix>_v(\d)+)$')


 def _lazy_init() -> None:
@ -389,10 +409,14 @@ def nvmlQuery(
            except AttributeError as e1:
                raise NVMLError_FunctionNotFound from e1

-        retval = func(*args, **kwargs)
+        retval = func(*args, **kwargs)  # type: ignore[operator]
    except NVMLError_FunctionNotFound as e2:
        if not ignore_function_not_found:
-            identifier = _inspect.getsource(func) if func.__name__ == '<lambda>' else repr(func)
+            identifier = (
+                func
+                if isinstance(func, str)
+                else (_inspect.getsource(func) if func.__name__ == '<lambda>' else repr(func))
+            )
            with __lock:
                if (
                    identifier not in UNKNOWN_FUNCTIONS
@ -431,7 +455,7 @@ def nvmlCheckReturn(


 # Patch layers for backward compatibility ##########################################################
-__patched_backward_compatibility_layers = False
+__patched_backward_compatibility_layers: bool = False


 def __patch_backward_compatibility_layers() -> None:
@ -441,9 +465,9 @@ def __patch_backward_compatibility_layers() -> None:
        return

    function_name_mapping_lock = _threading.Lock()
-    function_name_mapping = {}
+    function_name_mapping: dict[str, str] = {}

-    def function_mapping_update(mapping):
+    def function_mapping_update(mapping: dict[str, str]) -> dict[str, str]:
        with function_name_mapping_lock:
            mapping = dict(mapping)
            for name, mapped_name in function_name_mapping.items():
@ -452,10 +476,12 @@ def __patch_backward_compatibility_layers() -> None:
            function_name_mapping.update(mapping)
        return mapping

-    def with_mapped_function_name():
-        def wrapper(nvmlGetFunctionPointer):
+    def with_mapped_function_name() -> None:
+        def wrapper(
+            nvmlGetFunctionPointer: _Callable[[str], _ctypes._CFuncPtr],  # type: ignore[name-defined]
+        ) -> _Callable[[str], _ctypes._CFuncPtr]:  # type: ignore[name-defined]
            @_functools.wraps(nvmlGetFunctionPointer)
-            def wrapped(name):
+            def wrapped(name: str) -> _ctypes._CFuncPtr:  # type: ignore[name-defined]
                mapped_name = function_name_mapping.get(name, name)
                return nvmlGetFunctionPointer(mapped_name)

@ -467,10 +493,18 @@ def __patch_backward_compatibility_layers() -> None:
            ),
        )

-    def patch_function_pointers_when_fail(names, callback):
-        def wrapper(nvmlGetFunctionPointer):
+    def patch_function_pointers_when_fail(
+        names: set[str],
+        callback: _Callable[[str, set[str], Exception, _ModuleType, _ModuleType], str],
+    ) -> _Callable[  # type: ignore[name-defined]
+        [_Callable[[str], _ctypes._CFuncPtr]],
+        _Callable[[str], _ctypes._CFuncPtr],
+    ]:
+        def wrapper(
+            nvmlGetFunctionPointer: _Callable[[str], _ctypes._CFuncPtr],  # type: ignore[name-defined]
+        ) -> _Callable[[str], _ctypes._CFuncPtr]:  # type: ignore[name-defined]
            @_functools.wraps(nvmlGetFunctionPointer)
-            def wrapped(name):
+            def wrapped(name: str) -> _ctypes._CFuncPtr:  # type: ignore[name-defined]
                try:
                    return nvmlGetFunctionPointer(name)
                except NVMLError_FunctionNotFound as ex:
@ -483,12 +517,12 @@ def __patch_backward_compatibility_layers() -> None:

        return wrapper

-    def patch_process_info():
+    def patch_process_info() -> None:
        # pylint: disable-next=protected-access,no-member
        PrintableStructure = _pynvml._PrintableStructure

        # pylint: disable-next=missing-class-docstring,too-few-public-methods
-        class c_nvmlProcessInfo_v1_t(PrintableStructure):
+        class c_nvmlProcessInfo_v1_t(PrintableStructure):  # type: ignore[misc,valid-type]
            _fields_ = [
                ('pid', _ctypes.c_uint),
                ('usedGpuMemory', _ctypes.c_ulonglong),
@ -498,7 +532,7 @@ def __patch_backward_compatibility_layers() -> None:
            }

        # pylint: disable-next=missing-class-docstring,too-few-public-methods
-        class c_nvmlProcessInfo_v2_t(PrintableStructure):
+        class c_nvmlProcessInfo_v2_t(PrintableStructure):  # type: ignore[misc,valid-type]
            _fields_ = [
                ('pid', _ctypes.c_uint),
                ('usedGpuMemory', _ctypes.c_ulonglong),
@ -521,12 +555,12 @@ def __patch_backward_compatibility_layers() -> None:
        }

        def patch_process_info_callback(
-            name,
-            names,  # pylint: disable=unused-argument
-            exception,
-            pynvml,
-            modself,
-        ):
+            name: str,
+            names: set[str],  # pylint: disable=unused-argument
+            exception: Exception,
+            pynvml: _ModuleType,
+            modself: _ModuleType,
+        ) -> str:
            if name in nvmlDeviceGetRunningProcesses_v3_v2:
                mapping = nvmlDeviceGetRunningProcesses_v3_v2
                struct_type = c_nvmlProcessInfo_v2_t
@ -570,16 +604,20 @@ def __patch_backward_compatibility_layers() -> None:
    __patched_backward_compatibility_layers = True


-_pynvml_installation_corrupted = not callable(getattr(_pynvml, '_nvmlGetFunctionPointer', None))
+_pynvml_installation_corrupted: bool = not callable(
+    getattr(_pynvml, '_nvmlGetFunctionPointer', None),
+)

 if not _pynvml_installation_corrupted:
    __patch_backward_compatibility_layers()
 del __patch_backward_compatibility_layers


-_pynvml_memory_v2_available = hasattr(_pynvml, 'nvmlMemory_v2')
-_pynvml_get_memory_info_v2_available = _pynvml_memory_v2_available
-_driver_get_memory_info_v2_available = None if not _pynvml_installation_corrupted else False
+_pynvml_memory_v2_available: bool = hasattr(_pynvml, 'nvmlMemory_v2')
+_pynvml_get_memory_info_v2_available: bool = _pynvml_memory_v2_available
+_driver_get_memory_info_v2_available: bool | None = (
+    None if not _pynvml_installation_corrupted else False
+)


 # pylint: disable-next=function-redefined,too-many-branches
--- a/nvitop/api/process.py
+++ b/nvitop/api/process.py
@ -27,7 +27,7 @@ import os
 import threading
 from abc import ABCMeta
 from types import FunctionType
-from typing import TYPE_CHECKING, Any, Callable, Iterable
+from typing import TYPE_CHECKING, Any, Callable, Generator, Iterable
 from weakref import WeakValueDictionary

 from nvitop.api import host, libnvml
@ -181,8 +181,14 @@ class HostProcess(host.Process, metaclass=ABCMeta):
        )
    """

-    INSTANCE_LOCK = threading.RLock()
-    INSTANCES = WeakValueDictionary()
+    INSTANCE_LOCK: threading.RLock = threading.RLock()
+    INSTANCES: WeakValueDictionary[int, HostProcess] = WeakValueDictionary()
+
+    _pid: int
+    _super_gone: bool
+    _username: str | None
+    _ident: tuple
+    _lock: threading.RLock

    def __new__(cls, pid: int | None = None) -> HostProcess:
        """Return the cached instance of :class:`HostProcess`."""
@ -378,7 +384,7 @@ class HostProcess(host.Process, metaclass=ABCMeta):
        return [HostProcess(child.pid) for child in super().children(recursive)]

    @contextlib.contextmanager
-    def oneshot(self) -> contextlib.AbstractContextManager:
+    def oneshot(self) -> Generator[None, None, None]:
        """A utility context manager which considerably speeds up the retrieval of multiple process information at the same time.

        Internally different process info (e.g. name, ppid, uids, gids, ...) may be fetched by using
@ -405,12 +411,12 @@ class HostProcess(host.Process, metaclass=ABCMeta):
                with super().oneshot():
                    # pylint: disable=no-member
                    try:
-                        self.cmdline.cache_activate(self)
-                        self.running_time.cache_activate(self)
+                        self.cmdline.cache_activate(self)  # type: ignore[attr-defined]
+                        self.running_time.cache_activate(self)  # type: ignore[attr-defined]
                        yield
                    finally:
-                        self.cmdline.cache_deactivate(self)
-                        self.running_time.cache_deactivate(self)
+                        self.cmdline.cache_deactivate(self)  # type: ignore[attr-defined]
+                        self.running_time.cache_deactivate(self)  # type: ignore[attr-defined]

    def as_snapshot(
        self,
@ -441,13 +447,20 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
    representing the same PID on the host but different GPU devices are different.
    """

-    INSTANCE_LOCK = threading.RLock()
-    INSTANCES = WeakValueDictionary()
+    INSTANCE_LOCK: threading.RLock = threading.RLock()
+    INSTANCES: WeakValueDictionary[tuple[int, Device], GpuProcess] = WeakValueDictionary()
+
+    _pid: int
+    _host: HostProcess
+    _device: Device
+    _username: str | None
+    _ident: tuple
+    _hash: int | None

    # pylint: disable-next=too-many-arguments
    def __new__(
        cls,
-        pid: int,
+        pid: int | None,
        device: Device,
        # pylint: disable=unused-argument
        gpu_memory: int | NaType | None = None,
@ -485,7 +498,7 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
    # pylint: disable-next=too-many-arguments
    def __init__(
        self,
-        pid: int,  # pylint: disable=unused-argument
+        pid: int | None,  # pylint: disable=unused-argument
        device: Device,
        gpu_memory: int | NaType | None = None,
        gpu_instance_id: int | NaType | None = None,
@ -553,7 +566,7 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
                If the user do not have read privilege to the process' status file.
        """
        try:
-            return super().__getattr__(name)
+            return super().__getattr__(name)  # type: ignore[misc]
        except AttributeError:
            if name == '_cache':
                raise
@ -627,7 +640,7 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
        memory_total = self.device.memory_total()
        gpu_memory_percent = NA
        if libnvml.nvmlCheckReturn(memory_used, int) and libnvml.nvmlCheckReturn(memory_total, int):
-            gpu_memory_percent = round(100.0 * memory_used / memory_total, 1)
+            gpu_memory_percent = round(100.0 * memory_used / memory_total, 1)  # type: ignore[assignment]
        self._gpu_memory_percent = gpu_memory_percent

    def set_gpu_utilization(
@ -651,7 +664,7 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
    def update_gpu_status(self) -> int | NaType:
        """Update the GPU consumption status from a new NVML query."""
        self.set_gpu_memory(NA)
-        self.set_gpu_utilization(NA, NA, NA, NA)
+        self.set_gpu_utilization(NA, NA, NA, NA)  # type: ignore[arg-type]
        self.device.processes()
        return self.gpu_memory()

@ -989,8 +1002,10 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi
        If *failsafe* is :data:`True`, then if any method fails, the fallback value in
        :func:`auto_garbage_clean` will be used.
        """
-        cache = {}
-        context = cls.failsafe if failsafe else contextlib.nullcontext
+        cache: dict[int, Snapshot] = {}
+        context: Callable[[], contextlib.AbstractContextManager[None]] = (
+            cls.failsafe if failsafe else contextlib.nullcontext  # type: ignore[assignment]
+        )
        with context():
            return [
                process.as_snapshot(host_process_snapshot_cache=cache) for process in gpu_processes
@ -998,7 +1013,7 @@ class GpuProcess:  # pylint: disable=too-many-instance-attributes,too-many-publi

    @classmethod
    @contextlib.contextmanager
-    def failsafe(cls) -> contextlib.AbstractContextManager:
+    def failsafe(cls) -> Generator[None, None, None]:
        """A context manager that enables fallback values for methods that fail.

        Examples:
--- a/nvitop/api/utils.py
+++ b/nvitop/api/utils.py
@ -27,7 +27,7 @@ import os
 import re
 import sys
 import time
-from typing import Any, Callable, Iterable
+from typing import Any, Callable, Generator, Iterable, Iterator

 from psutil import WINDOWS

@ -70,12 +70,15 @@ except ImportError:
        text: str,
        color: str | None = None,
        on_color: str | None = None,
-        attrs: Iterable[str] = None,
+        attrs: Iterable[str] | None = None,
+        *,
+        no_color: bool | None = None,
+        force_color: bool | None = None,
    ) -> str:
        return text


-COLOR = sys.stdout.isatty()
+COLOR: bool = sys.stdout.isatty()


 def set_color(value: bool) -> None:
@ -94,7 +97,7 @@ def colored(
    text: str,
    color: str | None = None,
    on_color: str | None = None,
-    attrs: Iterable[str] = None,
+    attrs: Iterable[str] | None = None,
 ) -> str:
    """Colorize text with ANSI color escape codes.

@ -178,7 +181,7 @@ class NaType(str):
        """
        return math.nan

-    def __add__(self, other: object) -> str | float:
+    def __add__(self, other: object) -> str | float:  # type: ignore[override]
        """Return :data:`math.nan` if the operand is a number or uses string concatenation if the operand is a string (``NA + other``).

        A special case is when the operand is :const:`nvitop.NA` itself, the result is
@ -193,9 +196,11 @@ class NaType(str):
        >>> NA + 1.0
        nan
        """  # pylint: disable=line-too-long
-        if isinstance(other, (int, float)) or other is NA:
+        if isinstance(other, (int, float)):
            return float(self) + other
-        return super().__add__(other)
+        if other is NA:
+            return float(self)
+        return super().__add__(other)  # type: ignore[operator]

    def __radd__(self, other: object) -> str | float:
        """Return :data:`math.nan` if the operand is a number or uses string concatenation if the operand is a string (``other + NA``).
@ -223,8 +228,10 @@ class NaType(str):
        >>> NA + 1.0
        nan
        """
-        if isinstance(other, (int, float)) or other is NA:
+        if isinstance(other, (int, float)):
            return float(self) - other
+        if other is NA:
+            return float(self)
        return NotImplemented

    def __rsub__(self, other: object) -> float:
@ -241,7 +248,7 @@ class NaType(str):
            return other - float(self)
        return NotImplemented

-    def __mul__(self, other: object) -> float:
+    def __mul__(self, other: object) -> float:  # type: ignore[override]
        """Return :data:`math.nan` if the operand is a number (``NA * other``).

        A special case is when the operand is :const:`nvitop.NA` itself, the result is also :data:`math.nan`.
@ -253,11 +260,13 @@ class NaType(str):
        >>> NA * NA
        nan
        """
-        if isinstance(other, (int, float)) or other is NA:
+        if isinstance(other, (int, float)):
            return float(self) * other
+        if other is NA:
+            return float(self)
        return NotImplemented

-    def __rmul__(self, other: object) -> float:
+    def __rmul__(self, other: object) -> float:  # type: ignore[override]
        """Return :data:`math.nan` if the operand is a number (``other * NA``).

        >>> 1024 * NA
@ -280,9 +289,13 @@ class NaType(str):
        ZeroDivisionError: float division by zero
        >>> NA / 0.0
        ZeroDivisionError: float division by zero
+        >>> NA / NA
+        nan
        """
        if isinstance(other, (int, float)):
            return float(self) / other
+        if other is NA:
+            return float(self)
        return NotImplemented

    def __rtruediv__(self, other: object) -> float:
@ -308,9 +321,13 @@ class NaType(str):
        ZeroDivisionError: float division by zero
        >>> NA / 0.0
        ZeroDivisionError: float division by zero
+        >>> NA // NA
+        nan
        """
        if isinstance(other, (int, float)):
            return float(self) // other
+        if other is NA:
+            return float(self)
        return NotImplemented

    def __rfloordiv__(self, other: object) -> float:
@ -325,7 +342,7 @@ class NaType(str):
            return other // float(self)
        return NotImplemented

-    def __mod__(self, other: object) -> float:
+    def __mod__(self, other: object) -> float:  # type: ignore[override]
        """Return :data:`math.nan` if the operand is a number (``NA % other``).

        >>> NA % 1024
@ -339,6 +356,8 @@ class NaType(str):
        """
        if isinstance(other, (int, float)):
            return float(self) % other
+        if other is NA:
+            return float(self)
        return NotImplemented

    def __rmod__(self, other: object) -> float:
@ -421,25 +440,25 @@ class NaType(str):
        """The :const:`nvitop.NA` is always greater than any number, or uses the dictionary order for string."""
        if isinstance(x, (int, float)):
            return False
-        return super().__lt__(x)
+        return super().__lt__(x)  # type: ignore[operator]

    def __le__(self, x: object) -> bool:
        """The :const:`nvitop.NA` is always greater than any number, or uses the dictionary order for string."""
        if isinstance(x, (int, float)):
            return False
-        return super().__le__(x)
+        return super().__le__(x)  # type: ignore[operator]

    def __gt__(self, x: object) -> bool:
        """The :const:`nvitop.NA` is always greater than any number, or uses the dictionary order for string."""
        if isinstance(x, (int, float)):
            return True
-        return super().__gt__(x)
+        return super().__gt__(x)  # type: ignore[operator]

    def __ge__(self, x: object) -> bool:
        """The :const:`nvitop.NA` is always greater than any number, or uses the dictionary order for string."""
        if isinstance(x, (int, float)):
            return True
-        return super().__ge__(x)
+        return super().__ge__(x)  # type: ignore[operator]

    def __format__(self, format_spec: str) -> str:
        """Format :const:`nvitop.NA` according to ``format_spec``."""
@ -459,22 +478,22 @@ NA.__doc__ = """The singleton instance of :class:`NaType`. The actual value is :

 NotApplicable = NA

-KiB = 1 << 10
+KiB: int = 1 << 10
 """Kibibyte (1024)"""

-MiB = 1 << 20
+MiB: int = 1 << 20
 """Mebibyte (1024 * 1024)"""

-GiB = 1 << 30
+GiB: int = 1 << 30
 """Gibibyte (1024 * 1024 * 1024)"""

-TiB = 1 << 40
+TiB: int = 1 << 40
 """Tebibyte (1024 * 1024 * 1024 * 1024)"""

-PiB = 1 << 50
+PiB: int = 1 << 50
 """Pebibyte (1024 * 1024 * 1024 * 1024 * 1024)"""

-SIZE_UNITS = {
+SIZE_UNITS: dict[str | None, int] = {
    None: 1,
    '': 1,
    'B': 1,
@ -487,10 +506,10 @@ SIZE_UNITS = {
    'MB': 1000**2,
    'GB': 1000**3,
    'TB': 1000**4,
-    'PB': 1000**4,
+    'PB': 1000**5,
 }
 """Units of storage and memory measurements."""
-SIZE_PATTERN = re.compile(
+SIZE_PATTERN: re.Pattern = re.compile(
    r'^\s*\+?\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>[KMGTP]i?B?|B?)\s*$',
    flags=re.IGNORECASE,
 )
@ -651,7 +670,7 @@ class Snapshot:
        If the attribute is not defined, fetches from the original object and makes a function call.
        """
        try:
-            return super().__getattr__(name)
+            return super().__getattr__(name)  # type: ignore[misc]
        except AttributeError:
            attribute = getattr(self.real, name)
            if callable(attribute):
@ -671,17 +690,15 @@ class Snapshot:
        """Support ``snapshot['name'] = value`` syntax."""
        setattr(self, name, value)

-    def __iter__(self) -> Iterable[str]:
+    def __iter__(self) -> Iterator[str]:
        """Support ``for name in snapshot`` syntax and ``*`` tuple unpack ``[*snapshot]`` syntax."""

-        def gen() -> str:
-            for name in self.__dict__:
-                if name not in ('real', 'timestamp'):
-                    yield name
+        def gen() -> Generator[str, None, None]:
+            yield from (name for name in self.__dict__ if name not in ('real', 'timestamp'))

        return gen()

-    def keys(self) -> Iterable[str]:
+    def keys(self) -> Iterator[str]:
        # pylint: disable-next=line-too-long
        """Support ``**`` dictionary unpack ``{**snapshot}`` / ``dict(**snapshot)`` syntax and ``dict(snapshot)`` dictionary conversion."""
        return iter(self)
@ -730,6 +747,6 @@ def memoize_when_activated(method: Callable[[Any], Any]) -> Callable[[Any], Any]
        except AttributeError:
            pass

-    wrapped.cache_activate = cache_activate
-    wrapped.cache_deactivate = cache_deactivate
+    wrapped.cache_activate = cache_activate  # type: ignore[attr-defined]
+    wrapped.cache_deactivate = cache_deactivate  # type: ignore[attr-defined]
    return wrapped
--- a/nvitop/callbacks/keras.py
+++ b/nvitop/callbacks/keras.py
@ -57,8 +57,7 @@ class GpuStatsLogger(Callback):  # pylint: disable=too-many-instance-attributes
        ValueError:
            If NVIDIA driver is not installed, or the `gpus` argument does not match available devices.

-    Example::
-
+    Examples:
        >>> from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
        >>> from tensorflow.python.keras.callbacks import TensorBoard
        >>> from nvitop.callbacks.keras import GpuStatsLogger
--- a/nvitop/callbacks/pytorch_lightning.py
+++ b/nvitop/callbacks/pytorch_lightning.py
@ -58,8 +58,7 @@ class GpuStatsLogger(Callback):  # pylint: disable=too-many-instance-attributes
        MisconfigurationException:
            If NVIDIA driver is not installed, not running on GPUs, or ``Trainer`` has no logger.

-    Example::
-
+    Examples:
        >>> from pytorch_lightning import Trainer
        >>> from nvitop.callbacks.pytorch_lightning import GpuStatsLogger
        >>> gpu_stats = GpuStatsLogger()
--- a/nvitop/cli.py
+++ b/nvitop/cli.py
@ -237,29 +237,31 @@ def parse_arguments() -> argparse.Namespace:
        args.user.append(USERNAME)
    if args.gpu_util_thresh is None:
        try:
-            gpu_util_thresh = os.getenv('NVITOP_GPU_UTILIZATION_THRESHOLDS', None)
-            gpu_util_thresh = list(map(int, gpu_util_thresh.split(',')))[:2]
+            gpu_util_thresh = list(
+                map(int, os.getenv('NVITOP_GPU_UTILIZATION_THRESHOLDS', '').split(',')),
+            )[:2]
            if (
                len(gpu_util_thresh) != 2
                or min(gpu_util_thresh) <= 0
                or max(gpu_util_thresh) >= 100
            ):
                raise ValueError
-        except (ValueError, AttributeError):
+        except ValueError:
            pass
        else:
            args.gpu_util_thresh = gpu_util_thresh
    if args.mem_util_thresh is None:
        try:
-            mem_util_thresh = os.getenv('NVITOP_MEMORY_UTILIZATION_THRESHOLDS', None)
-            mem_util_thresh = list(map(int, mem_util_thresh.split(',')))[:2]
+            mem_util_thresh = list(
+                map(int, os.getenv('NVITOP_MEMORY_UTILIZATION_THRESHOLDS', '').split(',')),
+            )[:2]
            if (
                len(mem_util_thresh) != 2
                or min(mem_util_thresh) <= 0
                or max(mem_util_thresh) >= 100
            ):
                raise ValueError
-        except (ValueError, AttributeError):
+        except ValueError:
            pass
        else:
            args.mem_util_thresh = mem_util_thresh
@ -268,7 +270,7 @@ def parse_arguments() -> argparse.Namespace:


 # pylint: disable-next=too-many-branches,too-many-statements,too-many-locals
-def main() -> None:
+def main() -> int:
    """Main function for ``nvitop`` CLI."""
    args = parse_arguments()

@ -307,9 +309,9 @@ def main() -> None:
        return 1

    if args.gpu_util_thresh is not None:
-        Device.GPU_UTILIZATION_THRESHOLDS = tuple(sorted(args.gpu_util_thresh))
+        Device.GPU_UTILIZATION_THRESHOLDS = tuple(sorted(args.gpu_util_thresh))  # type: ignore[assignment]
    if args.mem_util_thresh is not None:
-        Device.MEMORY_UTILIZATION_THRESHOLDS = tuple(sorted(args.mem_util_thresh))
+        Device.MEMORY_UTILIZATION_THRESHOLDS = tuple(sorted(args.mem_util_thresh))  # type: ignore[assignment]

    if args.only is not None:
        indices = set(args.only)
@ -325,8 +327,8 @@ def main() -> None:
            for index in Device.parse_cuda_visible_devices()
        }
    else:
-        indices = range(device_count)
-    devices = Device.from_indices(sorted(set(indices)))
+        indices = set(range(device_count))
+    devices = Device.from_indices(sorted(indices))

    filters = []
    if args.compute:
@ -366,12 +368,17 @@ def main() -> None:
        ui = UI(devices, filters, ascii=args.ascii)
        if not sys.stdout.isatty():
            parent = HostProcess().parent()
-            grandparent = parent.parent() if parent is not None else None
-            if grandparent is not None and parent.name() == 'sh' and grandparent.name() == 'watch':
-                messages.append(
-                    'HINT: You are running `nvitop` under `watch` command. '
-                    'Please try `nvitop -m` directly.',
-                )
+            if parent is not None:
+                grandparent = parent.parent()
+                if (
+                    grandparent is not None
+                    and parent.name() == 'sh'
+                    and grandparent.name() == 'watch'
+                ):
+                    messages.append(
+                        'HINT: You are running `nvitop` under `watch` command. '
+                        'Please try `nvitop -m` directly.',
+                    )

    ui.print()
    ui.destroy()
@ -383,7 +390,7 @@ def main() -> None:
            else 'ERROR: A FunctionNotFound error occurred while calling:',
        ]
        unknown_function_messages.extend(
-            f'    nvmlQuery({func.__name__!r}, *args, **kwargs)'
+            f'    nvmlQuery({(func.__name__ if not isinstance(func, str) else func)!r}, *args, **kwargs)'
            for func, _ in libnvml.UNKNOWN_FUNCTIONS.values()
        )
        unknown_function_messages.append(
--- a/nvitop/gui/library/device.py
+++ b/nvitop/gui/library/device.py
@ -8,7 +8,7 @@ from cachetools.func import ttl_cache
 from nvitop.api import NA
 from nvitop.api import MigDevice as MigDeviceBase
 from nvitop.api import PhysicalDevice as DeviceBase
-from nvitop.api import Snapshot, libnvml, utilization2string
+from nvitop.api import libnvml, utilization2string
 from nvitop.gui.library.process import GpuProcess


@ -75,7 +75,7 @@ class Device(DeviceBase):
        return self._snapshot

    @property
-    def snapshot(self) -> Snapshot:
+    def snapshot(self):
        if self._snapshot is None:
            self.as_snapshot()
        return self._snapshot
--- a/nvitop/select.py
+++ b/nvitop/select.py
@ -62,12 +62,16 @@ import math
 import os
 import sys
 import warnings
-from typing import Any, Iterable
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence, overload

-from nvitop.api import Device, GpuProcess, colored, human2bytes, libnvml
+from nvitop.api import Device, GpuProcess, Snapshot, colored, human2bytes, libnvml
 from nvitop.version import __version__


+if TYPE_CHECKING:
+    from typing_extensions import Literal  # Python 3.8+
+
+
 __all__ = ['select_devices']

 try:
@ -78,10 +82,70 @@ except ModuleNotFoundError:
 TTY = sys.stdout.isatty()


+@overload
+def select_devices(
+    devices: Iterable[Device] | None,
+    *,
+    format: Literal['index'],  # pylint: disable=redefined-builtin
+    force_index: bool,
+    min_count: int,
+    max_count: int | None,
+    min_free_memory: int | str | None,
+    min_total_memory: int | str | None,
+    max_gpu_utilization: int | None,
+    max_memory_utilization: int | None,
+    tolerance: int,
+    free_accounts: list[str] | None,
+    sort: bool,
+    **kwargs: Any,
+) -> list[int] | list[tuple[int, int]]:
+    ...
+
+
+@overload
+def select_devices(
+    devices: Iterable[Device] | None,
+    *,
+    format: Literal['uuid'],  # pylint: disable=redefined-builtin
+    force_index: bool,
+    min_count: int,
+    max_count: int | None,
+    min_free_memory: int | str | None,
+    min_total_memory: int | str | None,
+    max_gpu_utilization: int | None,
+    max_memory_utilization: int | None,
+    tolerance: int,
+    free_accounts: list[str] | None,
+    sort: bool,
+    **kwargs: Any,
+) -> list[int] | list[tuple[int, int]]:
+    ...
+
+
+@overload
+def select_devices(
+    devices: Iterable[Device] | None,
+    *,
+    format: Literal['device'],  # pylint: disable=redefined-builtin
+    force_index: bool,
+    min_count: int,
+    max_count: int | None,
+    min_free_memory: int | str | None,
+    min_total_memory: int | str | None,
+    max_gpu_utilization: int | None,
+    max_memory_utilization: int | None,
+    tolerance: int,
+    free_accounts: list[str] | None,
+    sort: bool,
+    **kwargs: Any,
+) -> list[Device]:
+    ...
+
+
 def select_devices(  # pylint: disable=too-many-branches,too-many-statements,too-many-locals,unused-argument
    devices: Iterable[Device] | None = None,
    *,
-    format: str = 'index',  # pylint: disable=redefined-builtin
+    format: Literal['index', 'uuid', 'device'] = 'index',  # pylint: disable=redefined-builtin
    force_index: bool = False,
    min_count: int = 0,
    max_count: int | None = None,
@ -90,10 +154,10 @@ def select_devices(  # pylint: disable=too-many-branches,too-many-statements,too
    max_gpu_utilization: int | None = None,  # in percentage
    max_memory_utilization: int | None = None,  # in percentage
    tolerance: int = 0,  # in percentage
-    free_accounts: list[str] = None,
+    free_accounts: list[str] | None = None,
    sort: bool = True,
    **kwargs: Any,
-) -> list[int] | list[tuple[int, int]] | list[str]:
+) -> list[int] | list[tuple[int, int]] | list[str] | list[Device]:
    """Select a subset of devices satisfying the specified criteria.

    Note:
@ -151,7 +215,7 @@ def select_devices(  # pylint: disable=too-many-branches,too-many-statements,too

    if max_count is not None:
        if max_count == 0:
-            return []
+            return []  # type: ignore[return-value]
        assert max_count >= min_count >= 0

    free_accounts = set(free_accounts or [])
@ -164,11 +228,11 @@ def select_devices(  # pylint: disable=too-many-branches,too-many-statements,too
    if isinstance(min_total_memory, str):
        min_total_memory = human2bytes(min_total_memory)

-    available_devices = []
+    available_devices: list[Snapshot] = []
    for device in devices:
        available_devices.extend(dev.as_snapshot() for dev in device.to_leaf_devices())
    for device in available_devices:
-        device.loosen_constraints = 0
+        device.loosen_constraints = 0  # type: ignore[attr-defined]

    if len(free_accounts) > 0:
        with GpuProcess.failsafe():
@ -177,61 +241,53 @@ def select_devices(  # pylint: disable=too-many-branches,too-many-statements,too
                for process in device.real.processes().values():
                    if process.username() in free_accounts:
                        as_free_memory += process.gpu_memory()
-                device.memory_free += as_free_memory
-                device.memory_used -= as_free_memory
+                device.memory_free += as_free_memory  # type: ignore[attr-defined]
+                device.memory_used -= as_free_memory  # type: ignore[attr-defined]
+
+    def filter_func(
+        criteria: Callable[[Snapshot], bool],
+        original_criteria: Callable[[Snapshot], bool],
+    ) -> Callable[[Snapshot], bool]:
+        def wrapped(device: Snapshot) -> bool:
+            device.loosen_constraints += int(not original_criteria(device))  # type: ignore[attr-defined]
+            return criteria(device)
+
+        return wrapped

    if min_free_memory is not None:
        loosen_min_free_memory = min_free_memory * (1.0 - tolerance)
-        available_devices = filter(
-            lambda device: (
-                device.memory_free >= loosen_min_free_memory,
-                setattr(
-                    device,
-                    'loosen_constraints',
-                    device.loosen_constraints + int(not device.memory_free >= min_free_memory),
-                ),
-            )[0],
+        available_devices = filter(  # type: ignore[assignment]
+            filter_func(
+                lambda device: device.memory_free >= loosen_min_free_memory,
+                lambda device: device.memory_free >= min_free_memory,
+            ),
            available_devices,
        )
    if min_total_memory is not None:
        loosen_min_total_memory = min_total_memory * (1.0 - tolerance)
-        available_devices = filter(
-            lambda device: (
-                device.memory_total >= loosen_min_total_memory,
-                setattr(
-                    device,
-                    'loosen_constraints',
-                    device.loosen_constraints + int(not device.memory_total >= min_total_memory),
-                ),
-            )[0],
+        available_devices = filter(  # type: ignore[assignment]
+            filter_func(
+                lambda device: device.memory_total >= loosen_min_total_memory,
+                lambda device: device.memory_total >= min_total_memory,
+            ),
            available_devices,
        )
    if max_gpu_utilization is not None:
        loosen_max_gpu_utilization = max_gpu_utilization + 100.0 * tolerance
-        available_devices = filter(
-            lambda device: (
-                device.gpu_utilization <= loosen_max_gpu_utilization,
-                setattr(
-                    device,
-                    'loosen_constraints',
-                    device.loosen_constraints
-                    + int(not device.gpu_utilization <= max_gpu_utilization),
-                ),
-            )[0],
+        available_devices = filter(  # type: ignore[assignment]
+            filter_func(
+                lambda device: device.gpu_utilization <= loosen_max_gpu_utilization,
+                lambda device: device.gpu_utilization <= max_gpu_utilization,
+            ),
            available_devices,
        )
    if max_memory_utilization is not None:
        loosen_max_memory_utilization = max_memory_utilization + 100.0 * tolerance
-        available_devices = filter(
-            lambda device: (
-                device.memory_utilization <= loosen_max_memory_utilization,
-                setattr(
-                    device,
-                    'loosen_constraints',
-                    device.loosen_constraints
-                    + int(not device.memory_utilization <= max_memory_utilization),
-                ),
-            )[0],
+        available_devices = filter(  # type: ignore[assignment]
+            filter_func(
+                lambda device: device.memory_utilization <= loosen_max_memory_utilization,
+                lambda device: device.memory_utilization <= max_memory_utilization,
+            ),
            available_devices,
        )

@ -486,10 +542,11 @@ def parse_arguments() -> argparse.Namespace:
    return args


-def main() -> None:
+def main() -> int:
    """Main function for ``nvisel`` CLI."""
    args = parse_arguments()

+    devices: Sequence[Device]
    try:
        if hasattr(args, 'inherit'):
            if args.inherit is not None:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -82,6 +82,31 @@ line_length = 100
 lines_after_imports = 2
 multi_line_output = 3

+[tool.mypy]
+# Sync with requires-python
+python_version = 3.8  # appease mypy for syntax errors in numpy stubs
+pretty = true
+show_error_codes = true
+show_error_context = true
+show_traceback = true
+allow_redefinition = true
+check_untyped_defs = true
+disallow_incomplete_defs = false
+disallow_untyped_defs = false
+ignore_missing_imports = true
+no_implicit_optional = true
+strict_equality = true
+strict_optional = true
+warn_no_return = true
+warn_redundant_casts = true
+warn_unreachable = true
+warn_unused_configs = true
+warn_unused_ignores = true
+
+[[tool.mypy.overrides]]
+module = ['nvitop.callbacks.*', 'nvitop.gui.*']
+ignore_errors= true
+
 [tool.pydocstyle]
 convention = "google"
 match-dir = '^(?!(gui|callbacks|docs))[^\.].*'
@ -90,6 +115,7 @@ match-dir = '^(?!(gui|callbacks|docs))[^\.].*'
 ignore-words = "docs/source/spelling_wordlist.txt"

 [tool.ruff]
+# Sync with requires-python
 target-version = "py37"
 line-length = 100
 show-source = true
@ -145,7 +171,6 @@ ignore = [
 ]
 "nvitop/api/lib*.py" = [
    "N",     # pep8-naming
-    "ANN",   # flake8-annotations
 ]
 "nvitop/callbacks/*.py" = [
    "ANN",   # flake8-annotations
--- a/setup.py
+++ b/setup.py
@ -51,6 +51,8 @@ try:
                'black >= 22.6.0',
                'isort',
                'pylint[spelling] >= 2.16.0',
+                'mypy',
+                'typing-extensions',
                'pre-commit',
            ],
            'cuda10': ['nvidia-ml-py == 11.450.51'],