feat(exporter): add Prometheus exporter (#92)

2026-05-15 14:15:55 -06:00 · 2023-08-27 01:37:04 +08:00 · 2023-08-27 01:37:04 +08:00 · daf72c7bf3
commit daf72c7bf3
parent 9ff3ec3400
24 changed files with 1475 additions and 37 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -72,15 +72,22 @@ jobs:
          python -m venv venv &&
          (
            source venv/bin/activate &&
-            python -m pip install --upgrade pip setuptools pre-commit pylint[spelling] mypy typing-extensions
+            python -m pip install --upgrade pip setuptools pre-commit pylint[spelling] mypy typing-extensions &&
            python -m pip install -r requirements.txt &&
+            python -m pip install -r nvitop-exporter/requirements.txt &&
            python -m pre_commit install --install-hooks &&
            python -m pre_commit run --all-files &&
            python -c 'import nvitop' &&
            python -m nvitop --version &&
            python -m nvitop --help &&
            python -m nvitop.select --version &&
-            python -m nvitop.select --help
+            python -m nvitop.select --help &&
+            (
+              cd nvitop-exporter &&
+              python -c 'import nvitop_exporter' &&
+              python -m nvitop_exporter --version &&
+              python -m nvitop_exporter --help
+            )
          )

      - name: Test docker build
@ -92,12 +99,17 @@ jobs:
        if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
        run: |
          sed -i -E 's/^__release__\s*=.*$/__release__ = True/' nvitop/version.py
+          sed -i -E 's/^__release__\s*=.*$/__release__ = True/' nvitop-exporter/nvitop_exporter/version.py

      - name: Print version
-        run: python setup.py --version
+        run: |
+          python setup.py --version
+          python nvitop-exporter/setup.py --version

      - name: Build sdist and wheels
-        run: python -m build
+        run: |
+          python -m build --outdir dist .
+          python -m build --outdir dist nvitop-exporter

      - name: List built sdist and wheels
        run: ls -lh dist/
@ -135,15 +147,23 @@ jobs:
        if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
        run: |
          sed -i -E 's/^__release__\s*=.*$/__release__ = True/' nvitop/version.py
+          sed -i -E 's/^__release__\s*=.*$/__release__ = True/' nvitop-exporter/nvitop_exporter/version.py

      - name: Print version
-        run: python setup.py --version
+        run: |
+          python setup.py --version
+          python nvitop-exporter/setup.py --version

      - name: Check consistency between the package version and release tag
        if: startsWith(github.ref, 'refs/tags/')
        run: |
-          PACKAGE_VER="v$(python setup.py --version)"
          RELEASE_TAG="${GITHUB_REF#refs/*/}"
+          PACKAGE_VER="v$(python setup.py --version)"
+          if [[ "${PACKAGE_VER}" != "${RELEASE_TAG}" ]]; then
+            echo "package ver. (${PACKAGE_VER}) != release tag. (${RELEASE_TAG})"
+            exit 1
+          fi
+          PACKAGE_VER="v$(python nvitop-exporter/setup.py --version)"
          if [[ "${PACKAGE_VER}" != "${RELEASE_TAG}" ]]; then
            echo "package ver. (${PACKAGE_VER}) != release tag. (${RELEASE_TAG})"
            exit 1
@ -163,10 +183,10 @@ jobs:
        with:
          user: __token__
          password: ${{ secrets.TESTPYPI_UPLOAD_TOKEN }}
-          repository_url: https://test.pypi.org/legacy/
+          repository-url: https://test.pypi.org/legacy/
          verbose: true
-          print_hash: true
-          skip_existing: true
+          print-hash: true
+          skip-existing: true

      - name: Publish to PyPI
        if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
@ -175,5 +195,5 @@ jobs:
          user: __token__
          password: ${{ secrets.PYPI_UPLOAD_TOKEN }}
          verbose: true
-          print_hash: true
-          skip_existing: true
+          print-hash: true
+          skip-existing: true
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@ -40,6 +40,10 @@ jobs:
      - name: Check syntax (Python 3.7)
        run: |
          "${{ steps.py37.outputs.python-path }}" -m compileall nvitop
+          (
+            cd nvitop-exporter &&
+            "${{ steps.py37.outputs.python-path }}" -m compileall nvitop_exporter
+          )

      - name: Upgrade pip
        run: |
@ -67,6 +71,29 @@ jobs:
          "${{ steps.py37.outputs.python-path }}" -m nvitop.select --version
          "${{ steps.py37.outputs.python-path }}" -m nvitop.select --help

+      - name: Install dependencies for nvitop-exporter
+        run: |
+          python -m pip install -r nvitop-exporter/requirements.txt
+
+      - name: Import tests for nvitop-exporter
+        run: |
+          (
+            cd nvitop-exporter &&
+            python -c 'import nvitop_exporter' &&
+            python -m nvitop_exporter --version &&
+            python -m nvitop_exporter --help
+          )
+
+      - name: Import tests for nvitop-exporter (Python 3.7)
+        run: |
+          (
+            cd nvitop-exporter &&
+            "${{ steps.py37.outputs.python-path }}" -m pip install -r requirements.txt &&
+            "${{ steps.py37.outputs.python-path }}" -c 'import nvitop_exporter' &&
+            "${{ steps.py37.outputs.python-path }}" -m nvitop_exporter --version &&
+            "${{ steps.py37.outputs.python-path }}" -m nvitop_exporter --help
+          )
+
      - name: Install linters
        run: |
          python -m pip install --upgrade pre-commit pylint[spelling] mypy typing-extensions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -88,3 +88,7 @@ repos:
        language: system
        types_or: [python, pyi]
        require_serial: true
+        exclude: |
+          (?x)(
+            ^nvitop-exporter/setup.py$
+          )
--- a/.pylintrc
+++ b/.pylintrc
@ -421,7 +421,8 @@ confidence=HIGH,
 # no Warning level messages displayed, use "--disable=all --enable=classes
 # --disable=W".
 disable=consider-using-f-string,
-        duplicate-code
+        duplicate-code,
+        wrong-import-order

 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- Add Prometheus exporter by [@XuehaiPan](https://github.com/XuehaiPan) in [#92](https://github.com/XuehaiPan/nvitop/pull/92).
 - Add device APIs to query PCIe and NVLink throughput by [@XuehaiPan](https://github.com/XuehaiPan) in [#87](https://github.com/XuehaiPan/nvitop/pull/87).

 ### Changed
--- a/docs/source/spelling_wordlist.txt
+++ b/docs/source/spelling_wordlist.txt
@ -151,3 +151,5 @@ tx
 rx
 ThroughputInfo
 pytorch
+api
+utils
--- a/nvitop-exporter/LICENSE
+++ b/nvitop-exporter/LICENSE
@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/nvitop-exporter/MANIFEST.in
+++ b/nvitop-exporter/MANIFEST.in
@ -0,0 +1 @@
+include LICENSE
--- a/nvitop-exporter/README.md
+++ b/nvitop-exporter/README.md
@ -0,0 +1,11 @@
+# nvitop-exporter
+
+Prometheus exporter built on top of `nvitop`.
+
+## Installation
+
+Install from PyPI:
+
+```bash
+pip3 install --upgrade nvitop-exporter
+```
--- a/nvitop-exporter/nvitop_exporter/init.py
+++ b/nvitop-exporter/nvitop_exporter/init.py
@ -0,0 +1,24 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prometheus exporter built on top of ``nvitop``."""
+
+from nvitop_exporter.exporter import PrometheusExporter
+from nvitop_exporter.utils import get_ip_address
+from nvitop_exporter.version import __version__
+
+
+__all__ = ['PrometheusExporter', 'get_ip_address']
--- a/nvitop-exporter/nvitop_exporter/main.py
+++ b/nvitop-exporter/nvitop_exporter/main.py
@ -0,0 +1,25 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prometheus exporter built on top of ``nvitop``."""
+
+import sys
+
+from nvitop_exporter.cli import main
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/nvitop-exporter/nvitop_exporter/cli.py
+++ b/nvitop-exporter/nvitop_exporter/cli.py
@ -0,0 +1,240 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prometheus exporter built on top of ``nvitop``."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from typing import TextIO
+
+from prometheus_client import start_wsgi_server
+
+import nvitop
+from nvitop import Device, colored, libnvml
+from nvitop_exporter.exporter import PrometheusExporter
+from nvitop_exporter.utils import get_ip_address
+from nvitop_exporter.version import __version__
+
+
+def cprint(text: str = '', *, file: TextIO | None = None) -> None:
+    """Print colored text to a file."""
+    for prefix, color in (
+        ('INFO: ', 'yellow'),
+        ('WARNING: ', 'yellow'),
+        ('ERROR: ', 'red'),
+        ('NVML ERROR: ', 'red'),
+    ):
+        if text.startswith(prefix):
+            text = text.replace(
+                prefix.rstrip(),
+                colored(prefix.rstrip(), color=color, attrs=('bold',)),
+                1,
+            )
+    print(text, file=file)
+
+
+def parse_arguments() -> argparse.Namespace:
+    """Parse command-line arguments for ``nvitop-exporter``."""
+
+    def posfloat(argstring: str) -> float:
+        num = float(argstring)
+        if num <= 0:
+            raise ValueError
+        return num
+
+    posfloat.__name__ = 'positive float'
+
+    parser = argparse.ArgumentParser(
+        prog='nvitop-exporter',
+        description='Prometheus exporter built on top of `nvitop`.',
+        formatter_class=argparse.RawTextHelpFormatter,
+        add_help=False,
+    )
+    parser.add_argument(
+        '--help',
+        '-h',
+        dest='help',
+        action='help',
+        default=argparse.SUPPRESS,
+        help='Show this help message and exit.',
+    )
+    parser.add_argument(
+        '--version',
+        '-V',
+        dest='version',
+        action='version',
+        version=f'%(prog)s {__version__} (nvitop {nvitop.__version__})',
+        help="Show %(prog)s's version number and exit.",
+    )
+
+    parser.add_argument(
+        '--hostname',
+        '--host',
+        '-H',
+        dest='hostname',
+        type=str,
+        default=get_ip_address(),
+        metavar='HOSTNAME',
+        help='Hostname to display in the exporter. (default: %(default)s)',
+    )
+    parser.add_argument(
+        '--bind-address',
+        '--bind',
+        '-B',
+        dest='bind_address',
+        type=str,
+        default='127.0.0.1',
+        metavar='ADDRESS',
+        help='Local address to bind to. (default: %(default)s)',
+    )
+    parser.add_argument(
+        '--port',
+        '-p',
+        type=int,
+        default=8000,
+        help='Port to listen on. (default: %(default)d)',
+    )
+    parser.add_argument(
+        '--interval',
+        dest='interval',
+        type=posfloat,
+        default=1.0,
+        metavar='SEC',
+        help='Interval between updates in seconds. (default: %(default)s)',
+    )
+
+    args = parser.parse_args()
+    if args.interval < 0.25:
+        parser.error(
+            f'the interval {args.interval:0.2g}s is too short, which may cause performance issues. '
+            f'Expected 1/4 or higher.',
+        )
+
+    return args
+
+
+def main() -> int:  # pylint: disable=too-many-locals,too-many-statements
+    """Main function for ``nvitop-exporter`` CLI."""
+    args = parse_arguments()
+
+    try:
+        device_count = Device.count()
+    except libnvml.NVMLError_LibraryNotFound:
+        return 1
+    except libnvml.NVMLError as ex:
+        cprint(f'NVML ERROR: {ex}', file=sys.stderr)
+        return 1
+
+    if device_count == 0:
+        cprint('NVML ERROR: No NVIDIA devices found.', file=sys.stderr)
+        return 1
+
+    physical_devices = Device.from_indices(range(device_count))
+    mig_devices = []
+    for device in physical_devices:
+        mig_devices.extend(device.mig_devices())
+    cprint(
+        'INFO: Found {}{}.'.format(
+            colored(str(device_count), color='green', attrs=('bold',)),
+            (
+                ' physical device(s) and {} MIG device(s)'.format(
+                    colored(str(len(mig_devices)), color='blue', attrs=('bold',)),
+                )
+                if mig_devices
+                else ' device(s)'
+            ),
+        ),
+        file=sys.stderr,
+    )
+
+    devices = sorted(
+        physical_devices + mig_devices,  # type: ignore[operator]
+        key=lambda d: (d.index,) if isinstance(d.index, int) else d.index,
+    )
+    for device in devices:
+        name = device.name()
+        uuid = device.uuid()
+        if device.is_mig_device():
+            name = name.rpartition(' ')[-1]
+            cprint(
+                f'INFO:   MIG {name:<11} Device {device.mig_index:>2d}: (UUID: {uuid})',
+                file=sys.stderr,
+            )
+        else:
+            cprint(f'INFO: GPU {device.index}: {name} (UUID: {uuid})', file=sys.stderr)
+
+    exporter = PrometheusExporter(devices, hostname=args.hostname, interval=args.interval)
+
+    try:
+        start_wsgi_server(port=args.port, addr=args.bind_address)
+    except OSError as ex:
+        if 'address already in use' in str(ex).lower():
+            cprint(
+                (
+                    'ERROR: Address {} is already in use. '
+                    'Please specify a different port via `--port <PORT>`.'
+                ).format(
+                    colored(
+                        f'http://{args.bind_address}:{args.port}',
+                        color='blue',
+                        attrs=('bold', 'underline'),
+                    ),
+                ),
+                file=sys.stderr,
+            )
+        elif 'cannot assign requested address' in str(ex).lower():
+            cprint(
+                (
+                    'ERROR: Cannot assign requested address at {}. '
+                    'Please specify a different address via `--bind-address <ADDRESS>`.'
+                ).format(
+                    colored(
+                        f'http://{args.bind_address}:{args.port}',
+                        color='blue',
+                        attrs=('bold', 'underline'),
+                    ),
+                ),
+                file=sys.stderr,
+            )
+        else:
+            cprint(f'ERROR: {ex}', file=sys.stderr)
+        return 1
+
+    cprint(
+        'INFO: Start the exporter on {} at {}.'.format(
+            colored(args.hostname, color='magenta', attrs=('bold',)),
+            colored(
+                f'http://{args.bind_address}:{args.port}/metrics',
+                color='green',
+                attrs=('bold', 'underline'),
+            ),
+        ),
+        file=sys.stderr,
+    )
+
+    try:
+        exporter.collect()
+    except KeyboardInterrupt:
+        cprint(file=sys.stderr)
+        cprint('INFO: Interrupted by user.', file=sys.stderr)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/nvitop-exporter/nvitop_exporter/exporter.py
+++ b/nvitop-exporter/nvitop_exporter/exporter.py
@ -0,0 +1,608 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prometheus exporter built on top of ``nvitop``."""
+
+from __future__ import annotations
+
+import math
+import time
+from typing import Sequence
+
+from prometheus_client import REGISTRY, CollectorRegistry, Gauge, Info
+
+from nvitop import Device, MiB, MigDevice, PhysicalDevice, host
+from nvitop.api.process import GpuProcess
+from nvitop_exporter.utils import get_ip_address
+
+
+class PrometheusExporter:  # pylint: disable=too-many-instance-attributes
+    """Prometheus exporter built on top of ``nvitop``."""
+
+    def __init__(  # pylint: disable=too-many-statements
+        self,
+        devices: Sequence[Device],
+        hostname: str | None = None,
+        *,
+        registry: CollectorRegistry = REGISTRY,
+        interval: float = 1.0,
+    ) -> None:
+        """Initialize the Prometheus exporter."""
+        if not isinstance(devices, (list, tuple)):
+            raise TypeError(f'Expected a list or tuple of devices, got {type(devices)}')
+        devices = list(devices)
+
+        for device in devices:
+            if not isinstance(device, (PhysicalDevice, MigDevice)):
+                raise TypeError(f'Expected a PhysicalDevice or MigDevice, got {type(device)}')
+
+        self.devices = devices
+        self.hostname = hostname or get_ip_address()
+        self.registry = registry
+        self.interval = interval
+
+        self.info = Info(
+            'nvitop',
+            documentation='NVITOP.',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.info.labels(hostname=self.hostname).info(
+            {
+                'device_count': str(Device.count()),
+                'driver_version': Device.driver_version(),
+                'cuda_driver_version': Device.cuda_driver_version(),
+            },
+        )
+
+        # Create gauges for host metrics
+        self.host_uptime = Gauge(
+            name='host_uptime',
+            documentation='Host uptime (s).',
+            unit='Second',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_cpu_percent = Gauge(
+            name='host_cpu_percent',
+            documentation='Host CPU percent (%).',
+            unit='Percentage',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_virtual_memory_total = Gauge(
+            name='host_virtual_memory_total',
+            documentation='Host virtual memory total (MiB).',
+            unit='MiB',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_virtual_memory_used = Gauge(
+            name='host_virtual_memory_used',
+            documentation='Host virtual memory used (MiB).',
+            unit='MiB',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_virtual_memory_free = Gauge(
+            name='host_virtual_memory_free',
+            documentation='Host virtual memory free (MiB).',
+            unit='MiB',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_virtual_memory_percent = Gauge(
+            name='host_virtual_memory_percent',
+            documentation='Host virtual memory percent (%).',
+            unit='Percentage',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_swap_memory_total = Gauge(
+            name='host_swap_memory_total',
+            documentation='Host swap total (MiB).',
+            unit='MiB',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_swap_memory_used = Gauge(
+            name='host_swap_memory_used',
+            documentation='Host swap used (MiB).',
+            unit='MiB',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_swap_memory_free = Gauge(
+            name='host_swap_memory_free',
+            documentation='Host swap free (MiB).',
+            unit='MiB',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_swap_memory_percent = Gauge(
+            name='host_swap_memory_percent',
+            documentation='Host swap percent (%).',
+            unit='Percentage',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_load_average_1m = Gauge(
+            name='host_load_average_1m',
+            documentation='Host load average for the last minute.',
+            unit='Percentage',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_load_average_5m = Gauge(
+            name='host_load_average_5m',
+            documentation='Host load average for the last 5 minutes.',
+            unit='Percentage',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_load_average_15m = Gauge(
+            name='host_load_average_15m',
+            documentation='Host load average for the last 15 minutes.',
+            unit='Percentage',
+            labelnames=['hostname'],
+            registry=self.registry,
+        )
+        self.host_net_io_tx_data = Gauge(
+            name='host_net_io_tx_data',
+            documentation='Host network I/O transmitted data (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'interface'],
+            registry=self.registry,
+        )
+        self.host_net_io_rx_data = Gauge(
+            name='host_net_io_rx_data',
+            documentation='Host network I/O received data (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'interface'],
+            registry=self.registry,
+        )
+        self.host_net_io_tx_packets = Gauge(
+            name='host_net_io_tx_packets',
+            documentation='Host network I/O transmitted packets.',
+            unit='Packet',
+            labelnames=['hostname', 'interface'],
+            registry=self.registry,
+        )
+        self.host_net_io_rx_packets = Gauge(
+            name='host_net_io_rx_packets',
+            documentation='Host network I/O received packets.',
+            unit='Packet',
+            labelnames=['hostname', 'interface'],
+            registry=self.registry,
+        )
+        self.host_disk_io_read_data = Gauge(
+            name='host_disk_io_read_data',
+            documentation='Host disk I/O read data (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'partition'],
+            registry=self.registry,
+        )
+        self.host_disk_io_write_data = Gauge(
+            name='host_disk_io_write_data',
+            documentation='Host disk I/O write data (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'partition'],
+            registry=self.registry,
+        )
+        self.host_disk_usage_total = Gauge(
+            name='host_disk_usage_total',
+            documentation='Host disk usage total (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'mountpoint'],
+            registry=self.registry,
+        )
+        self.host_disk_usage_used = Gauge(
+            name='host_disk_usage_used',
+            documentation='Host disk usage used (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'mountpoint'],
+            registry=self.registry,
+        )
+        self.host_disk_usage_free = Gauge(
+            name='host_disk_usage_free',
+            documentation='Host disk usage free (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'mountpoint'],
+            registry=self.registry,
+        )
+        self.host_disk_usage_percent = Gauge(
+            name='host_disk_usage_percent',
+            documentation='Host disk usage percent (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'mountpoint'],
+            registry=self.registry,
+        )
+
+        # Create gauges for GPU metrics
+        self.gpu_utilization = Gauge(
+            name='gpu_utilization',
+            documentation='GPU utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_memory_utilization = Gauge(
+            name='gpu_memory_utilization',
+            documentation='GPU memory utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_encoder_utilization = Gauge(
+            name='gpu_encoder_utilization',
+            documentation='GPU encoder utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_decoder_utilization = Gauge(
+            name='gpu_decoder_utilization',
+            documentation='GPU decoder utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_memory_total = Gauge(
+            name='gpu_memory_total',
+            documentation='GPU memory total (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_memory_used = Gauge(
+            name='gpu_memory_used',
+            documentation='GPU memory used (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_memory_free = Gauge(
+            name='gpu_memory_free',
+            documentation='GPU memory free (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_memory_percent = Gauge(
+            name='gpu_memory_percent',
+            documentation='GPU memory percent (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_clock_sm = Gauge(
+            name='gpu_clock_sm',
+            documentation='GPU SM clock (MHz).',
+            unit='MHz',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_clock_memory = Gauge(
+            name='gpu_clock_memory',
+            documentation='GPU memory clock (MHz).',
+            unit='MHz',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_clock_graphics = Gauge(
+            name='gpu_clock_graphics',
+            documentation='GPU graphics clock (MHz).',
+            unit='MHz',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_clock_video = Gauge(
+            name='gpu_clock_video',
+            documentation='GPU video clock (MHz).',
+            unit='MHz',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_power_usage = Gauge(
+            name='gpu_power_usage',
+            documentation='GPU power usage (W).',
+            unit='W',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_power_limit = Gauge(
+            name='gpu_power_limit',
+            documentation='GPU power limit (W).',
+            unit='W',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_temperature = Gauge(
+            name='gpu_temperature',
+            documentation='GPU temperature (C).',
+            unit='C',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_fan_speed = Gauge(
+            name='gpu_fan_speed',
+            documentation='GPU fan speed (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_pcie_tx_throughput = Gauge(
+            name='gpu_pcie_tx_throughput',
+            documentation='GPU PCIe transmit throughput (MiB/s).',
+            unit='MiBps',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_pcie_rx_throughput = Gauge(
+            name='gpu_pcie_rx_throughput',
+            documentation='GPU PCIe receive throughput (MiB/s).',
+            unit='MiBps',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_nvlink_mean_tx_throughput = Gauge(
+            name='gpu_nvlink_mean_tx_throughput',
+            documentation='GPU mean NVLink transmit throughput (MiB/s).',
+            unit='MiBps',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_nvlink_mean_rx_throughput = Gauge(
+            name='gpu_nvlink_mean_rx_throughput',
+            documentation='GPU mean NVLink receive throughput (MiB/s).',
+            unit='MiBps',
+            labelnames=['hostname', 'index', 'devicename', 'uuid'],
+            registry=self.registry,
+        )
+        self.gpu_nvlink_tx_throughput = Gauge(
+            name='gpu_nvlink_tx_throughput',
+            documentation='GPU NVLink transmit throughput (MiB/s).',
+            unit='MiBps',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'link'],
+            registry=self.registry,
+        )
+        self.gpu_nvlink_rx_throughput = Gauge(
+            name='gpu_nvlink_rx_throughput',
+            documentation='GPU NVLink receive throughput (MiB/s).',
+            unit='MiBps',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'link'],
+            registry=self.registry,
+        )
+
+        # Create gauges for process metrics
+        self.process_running_time = Gauge(
+            name='process_running_time',
+            documentation='Process running time (s).',
+            unit='Second',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_cpu_percent = Gauge(
+            name='process_cpu_percent',
+            documentation='Process CPU percent (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_rss_memory = Gauge(
+            name='process_rss_memory',
+            documentation='Process memory resident set size (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_memory_percent = Gauge(
+            name='process_memory_percent',
+            documentation='Process memory percent (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_gpu_memory = Gauge(
+            name='process_gpu_memory',
+            documentation='Process GPU memory (MiB).',
+            unit='MiB',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_gpu_sm_utilization = Gauge(
+            name='process_gpu_sm_utilization',
+            documentation='Process GPU SM utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_gpu_memory_utilization = Gauge(
+            name='process_gpu_memory_utilization',
+            documentation='Process GPU memory utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_gpu_encoder_utilization = Gauge(
+            name='process_gpu_encoder_utilization',
+            documentation='Process GPU encoder utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+        self.process_gpu_decoder_utilization = Gauge(
+            name='process_gpu_decoder_utilization',
+            documentation='Process GPU decoder utilization (%).',
+            unit='Percentage',
+            labelnames=['hostname', 'index', 'devicename', 'uuid', 'pid', 'username'],
+            registry=self.registry,
+        )
+
+    def collect(self) -> None:
+        """Collect metrics."""
+        while True:
+            next_update_time = time.monotonic() + self.interval
+            self.update_host()
+            for device in self.devices:
+                self.update_device(device)
+            time.sleep(max(0.0, next_update_time - time.monotonic()))
+
+    def update_host(self) -> None:
+        """Update metrics for the host."""
+        load_average = host.load_average()
+        if load_average is None:
+            load_average = (0.0, 0.0, 0.0)  # type: ignore[unreachable]
+        virtual_memory = host.virtual_memory()
+        swap_memory = host.swap_memory()
+        net_io_counters = host.net_io_counters(pernic=True)  # type: ignore[attr-defined]
+        disk_io_counters = host.disk_io_counters(perdisk=True)  # type: ignore[attr-defined]
+
+        for gauge, value in (
+            (self.host_uptime, host.uptime()),
+            (self.host_cpu_percent, host.cpu_percent()),
+            (self.host_virtual_memory_total, virtual_memory.total / MiB),
+            (self.host_virtual_memory_used, virtual_memory.used / MiB),
+            (self.host_virtual_memory_free, virtual_memory.free / MiB),
+            (self.host_virtual_memory_percent, virtual_memory.percent),
+            (self.host_swap_memory_total, swap_memory.total / MiB),
+            (self.host_swap_memory_used, swap_memory.used / MiB),
+            (self.host_swap_memory_free, swap_memory.free / MiB),
+            (self.host_swap_memory_percent, swap_memory.percent),
+            (self.host_load_average_1m, load_average[0]),
+            (self.host_load_average_5m, load_average[1]),
+            (self.host_load_average_15m, load_average[2]),
+        ):
+            gauge.labels(self.hostname).set(value)
+
+        for interface, net_io_counter in net_io_counters.items():
+            for gauge, value in (
+                (self.host_net_io_tx_data, net_io_counter.bytes_sent / MiB),
+                (self.host_net_io_rx_data, net_io_counter.bytes_recv / MiB),
+                (self.host_net_io_tx_packets, net_io_counter.packets_sent),
+                (self.host_net_io_rx_packets, net_io_counter.packets_recv),
+            ):
+                gauge.labels(hostname=self.hostname, interface=interface).set(value)
+
+        for partition, disk_io_counter in disk_io_counters.items():
+            for gauge, value in (
+                (self.host_disk_io_read_data, disk_io_counter.read_bytes / MiB),
+                (self.host_disk_io_write_data, disk_io_counter.write_bytes / MiB),
+            ):
+                gauge.labels(hostname=self.hostname, partition=partition).set(value)
+        for partition in host.disk_partitions():  # type: ignore[attr-defined]
+            try:
+                partition_usage = host.disk_usage(partition.mountpoint)  # type: ignore[attr-defined]
+            except (OSError, host.PsutilError):
+                continue
+            for gauge, value in (
+                (self.host_disk_usage_total, partition_usage.total / MiB),
+                (self.host_disk_usage_used, partition_usage.used / MiB),
+                (self.host_disk_usage_free, partition_usage.free / MiB),
+                (self.host_disk_usage_percent, partition_usage.percent),
+            ):
+                gauge.labels(hostname=self.hostname, mountpoint=partition.mountpoint).set(value)
+
+    def update_device(self, device: Device) -> None:
+        """Update metrics for a single device."""
+        index = (
+            str(device.index) if isinstance(device.index, int) else ':'.join(map(str, device.index))
+        )
+        name = device.name()
+        uuid = device.uuid()
+
+        with device.oneshot():
+            for gauge, value in (
+                (self.gpu_utilization, float(device.gpu_utilization())),
+                (self.gpu_memory_utilization, float(device.memory_utilization())),
+                (self.gpu_encoder_utilization, float(device.encoder_utilization())),
+                (self.gpu_decoder_utilization, float(device.decoder_utilization())),
+                (self.gpu_memory_total, device.memory_total() / MiB),
+                (self.gpu_memory_used, device.memory_used() / MiB),
+                (self.gpu_memory_free, device.memory_free() / MiB),
+                (self.gpu_memory_percent, float(device.memory_percent())),
+                (self.gpu_clock_sm, float(device.clock_infos().sm)),
+                (self.gpu_clock_memory, float(device.clock_infos().memory)),
+                (self.gpu_clock_graphics, float(device.clock_infos().graphics)),
+                (self.gpu_clock_video, float(device.clock_infos().video)),
+                (self.gpu_power_usage, device.power_usage() / 1000.0),
+                (self.gpu_power_limit, device.power_limit() / 1000.0),
+                (self.gpu_temperature, float(device.temperature())),
+                (self.gpu_fan_speed, float(device.fan_speed())),
+                (self.gpu_pcie_tx_throughput, device.pcie_tx_throughput() / 1024.0),
+                (self.gpu_pcie_rx_throughput, device.pcie_rx_throughput() / 1024.0),
+                (self.gpu_nvlink_mean_tx_throughput, device.nvlink_mean_tx_throughput() / 1024.0),
+                (self.gpu_nvlink_mean_rx_throughput, device.nvlink_mean_rx_throughput() / 1024.0),
+            ):
+                gauge.labels(
+                    hostname=self.hostname,
+                    index=index,
+                    devicename=name,
+                    uuid=uuid,
+                ).set(value)
+
+            for gauge, nvlink_throughput in (
+                (self.gpu_nvlink_tx_throughput, device.nvlink_tx_throughput()),
+                (self.gpu_nvlink_rx_throughput, device.nvlink_rx_throughput()),
+            ):
+                for link, throughput in enumerate(nvlink_throughput):
+                    gauge.labels(
+                        hostname=self.hostname,
+                        index=index,
+                        devicename=name,
+                        uuid=uuid,
+                        link=link,
+                    ).set(throughput / 1024.0)
+
+        with GpuProcess.failsafe():
+            for pid, process in device.processes().items():
+                with process.oneshot():
+                    username = process.username()
+                    running_time = process.running_time()
+                    for gauge, value in (
+                        (
+                            self.process_running_time,
+                            running_time.total_seconds() if running_time else math.nan,
+                        ),
+                        (self.process_cpu_percent, process.cpu_percent()),
+                        (self.process_rss_memory, process.host_memory() / MiB),
+                        (self.process_memory_percent, float(process.memory_percent())),
+                        (self.process_gpu_memory, process.gpu_memory() / MiB),
+                        (
+                            self.process_gpu_sm_utilization,
+                            float(process.gpu_sm_utilization()),
+                        ),
+                        (
+                            self.process_gpu_memory_utilization,
+                            float(process.gpu_memory_utilization()),
+                        ),
+                        (
+                            self.process_gpu_encoder_utilization,
+                            float(process.gpu_encoder_utilization()),
+                        ),
+                        (
+                            self.process_gpu_decoder_utilization,
+                            float(process.gpu_decoder_utilization()),
+                        ),
+                    ):
+                        gauge.labels(
+                            hostname=self.hostname,
+                            index=index,
+                            devicename=name,
+                            uuid=uuid,
+                            pid=pid,
+                            username=username,
+                        ).set(value)
--- a/nvitop-exporter/nvitop_exporter/utils.py
+++ b/nvitop-exporter/nvitop_exporter/utils.py
@ -0,0 +1,38 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility functions for ``nvitop-exporter``."""
+
+import socket
+
+
+__all__ = ['get_ip_address']
+
+
+# Reference: https://stackoverflow.com/a/28950776
+def get_ip_address() -> str:
+    """Get the IP address of the current machine."""
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    s.settimeout(0.0)
+    try:
+        # Doesn't even have to be reachable
+        s.connect(('10.254.254.254', 1))
+        ip_address = s.getsockname()[0]
+    except Exception:  # noqa: BLE001 # pylint: disable=broad-except
+        ip_address = '127.0.0.1'
+    finally:
+        s.close()
+    return ip_address
--- a/nvitop-exporter/nvitop_exporter/version.py
+++ b/nvitop-exporter/nvitop_exporter/version.py
@ -0,0 +1,54 @@
+# This file is part of nvitop, the interactive NVIDIA-GPU process viewer.
+#
+# Copyright 2021-2023 Xuehai Pan. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Prometheus exporter built on top of ``nvitop``."""
+
+__version__ = '1.3.0'
+__license__ = 'Apache-2.0'
+__author__ = __maintainer__ = 'Xuehai Pan'
+__email__ = 'XuehaiPan@pku.edu.cn'
+__release__ = False
+
+if not __release__:
+    import os
+    import subprocess
+
+    try:
+        prefix, sep, suffix = (
+            subprocess.check_output(
+                ['git', 'describe', '--abbrev=7'],  # noqa: S603,S607
+                cwd=os.path.dirname(os.path.abspath(__file__)),
+                stderr=subprocess.DEVNULL,
+                text=True,
+            )
+            .strip()
+            .lstrip('v')
+            .replace('-', '.dev', 1)
+            .replace('-', '+', 1)
+            .partition('.dev')
+        )
+        if sep:
+            version_prefix, dot, version_tail = prefix.rpartition('.')
+            prefix = f'{version_prefix}{dot}{int(version_tail) + 1}'
+            __version__ = sep.join((prefix, suffix))
+            del version_prefix, dot, version_tail
+        else:
+            __version__ = prefix
+        del prefix, sep, suffix
+    except (OSError, subprocess.CalledProcessError):
+        pass
+
+    del os, subprocess
--- a/nvitop-exporter/pyproject.toml
+++ b/nvitop-exporter/pyproject.toml
@ -0,0 +1,83 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "nvitop-exporter"
+description = "Prometheus exporter built on top of `nvitop`."
+readme = "README.md"
+requires-python = ">= 3.7"
+authors = [{ name = "Xuehai Pan", email = "XuehaiPan@pku.edu.cn" }]
+license = { text = "Apache License, Version 2.0 (Apache-2.0)" }
+keywords = [
+    "nvidia",
+    "nvidia-smi",
+    "NVIDIA",
+    "NVML",
+    "CUDA",
+    "GPU",
+    "top",
+    "monitoring",
+    "prometheus",
+    "Prometheus",
+    "grafana",
+    "Grafana",
+]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX :: Linux",
+    "Environment :: GPU",
+    "Environment :: GPU :: NVIDIA CUDA",
+    "Intended Audience :: Developers",
+    "Intended Audience :: End Users/Desktop",
+    "Intended Audience :: System Administrators",
+    "Topic :: System :: Hardware",
+    "Topic :: System :: Monitoring",
+    "Topic :: System :: Systems Administration",
+    "Topic :: Utilities",
+]
+dependencies = [
+    # Sync with nvitop/version.py and requirements.txt
+    "nvitop == 1.3.0",
+    "prometheus-client >= 0.4.0",
+]
+dynamic = ["version"]
+
+[project.scripts]
+nvitop-exporter = "nvitop_exporter.cli:main"
+
+[project.urls]
+Homepage = "https://github.com/XuehaiPan/nvitop"
+Repository = "https://github.com/XuehaiPan/nvitop"
+Documentation = "https://nvitop.readthedocs.io"
+"Bug Report" = "https://github.com/XuehaiPan/nvitop/issues"
+
+[tool.setuptools.packages.find]
+include = ["nvitop_exporter", "nvitop_exporter.*"]
+
+[tool.black]
+safe = true
+line-length = 100
+skip-string-normalization = true
+target-version = ["py37", "py38", "py39", "py310", "py311"]
+
+[tool.isort]
+atomic = true
+profile = "black"
+src_paths = ["nvitop_exporter"]
+known_first_party = ["nvitop", "nvitop_exporter"]
+indent = 4
+line_length = 100
+lines_after_imports = 2
+multi_line_output = 3
+
+[tool.ruff]
+extend = "../pyproject.toml"
--- a/nvitop-exporter/requirements.txt
+++ b/nvitop-exporter/requirements.txt
@ -0,0 +1,2 @@
+nvitop
+prometheus-client >= 0.4.0
--- a/nvitop-exporter/setup.py
+++ b/nvitop-exporter/setup.py
@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+"""Setup script for ``nvitop-exporter``."""
+
+import pathlib
+import re
+import sys
+
+from setuptools import setup
+
+
+HERE = pathlib.Path(__file__).absolute().parent
+VERSION_FILE = HERE / 'nvitop_exporter' / 'version.py'
+
+sys.path.insert(0, str(VERSION_FILE.parent))
+# pylint: disable-next=import-error,wrong-import-position
+import version  # noqa
+
+
+VERSION_CONTENT = None
+
+try:
+    if not version.__release__:
+        try:
+            VERSION_CONTENT = VERSION_FILE.read_text(encoding='utf-8')
+            VERSION_FILE.write_text(
+                data=re.sub(
+                    r"""__version__\s*=\s*('[^']+'|"[^"]+")""",
+                    f'__version__ = {version.__version__!r}',
+                    string=VERSION_CONTENT,
+                ),
+                encoding='utf-8',
+            )
+        except OSError:
+            VERSION_CONTENT = None
+
+    setup(
+        name='nvitop-exporter',
+        version=version.__version__,
+    )
+finally:
+    if VERSION_CONTENT is not None:
+        with VERSION_FILE.open(mode='wt', encoding='utf-8', newline='') as file:
+            file.write(VERSION_CONTENT)
--- a/nvitop/api/init.py
+++ b/nvitop/api/init.py
@ -29,18 +29,37 @@ from nvitop.api.device import (
 )
 from nvitop.api.libnvml import NVMLError, nvmlCheckReturn
 from nvitop.api.process import GpuProcess, HostProcess, command_join
-from nvitop.api.utils import *  # noqa: F403
+from nvitop.api.utils import (  # explicitly export these to appease mypy
+    NA,
+    SIZE_UNITS,
+    UINT_MAX,
+    ULONGLONG_MAX,
+    GiB,
+    KiB,
+    MiB,
+    NaType,
+    NotApplicable,
+    NotApplicableType,
+    PiB,
+    Snapshot,
+    TiB,
+    boolify,
+    bytes2human,
+    colored,
+    human2bytes,
+    set_color,
+    timedelta2human,
+    utilization2string,
+)


 __all__ = [
-    'take_snapshots',
-    'collect_in_background',
-    'ResourceMetricCollector',
-    'libnvml',
-    'nvmlCheckReturn',
    'NVMLError',
+    'nvmlCheckReturn',
+    'libnvml',
    'libcuda',
    'libcudart',
+    # nvitop.api.device
    'Device',
    'PhysicalDevice',
    'MigDevice',
@ -48,9 +67,34 @@ __all__ = [
    'CudaMigDevice',
    'parse_cuda_visible_devices',
    'normalize_cuda_visible_devices',
+    # nvitop.api.process
    'host',
    'HostProcess',
    'GpuProcess',
    'command_join',
-    *utils.__all__,
+    # nvitop.api.collector
+    'take_snapshots',
+    'collect_in_background',
+    'ResourceMetricCollector',
+    # nvitop.api.utils
+    'NA',
+    'NaType',
+    'NotApplicable',
+    'NotApplicableType',
+    'UINT_MAX',
+    'ULONGLONG_MAX',
+    'KiB',
+    'MiB',
+    'GiB',
+    'TiB',
+    'PiB',
+    'SIZE_UNITS',
+    'bytes2human',
+    'human2bytes',
+    'timedelta2human',
+    'utilization2string',
+    'colored',
+    'set_color',
+    'boolify',
+    'Snapshot',
 ]
--- a/nvitop/api/device.py
+++ b/nvitop/api/device.py
@ -1154,7 +1154,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me

    gpu_percent = gpu_utilization  # in percentage

-    def memory_utilization(self) -> float | NaType:  # in percentage
+    def memory_utilization(self) -> int | NaType:  # in percentage
        """Percent of time over the past sample period during which global (device) memory was being read or written.

        The sample period may be between 1 second and 1/6 second depending on the product.
@ -1170,7 +1170,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """  # pylint: disable=line-too-long
        return self.utilization_rates().memory

-    def encoder_utilization(self) -> float | NaType:  # in percentage
+    def encoder_utilization(self) -> int | NaType:  # in percentage
        """The encoder utilization rate  in percentage.

        Returns: Union[int, NaType]
@ -1178,7 +1178,7 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
        """
        return self.utilization_rates().encoder

-    def decoder_utilization(self) -> float | NaType:  # in percentage\
+    def decoder_utilization(self) -> int | NaType:  # in percentage
        """The decoder utilization rate  in percentage.

        Returns: Union[int, NaType]
@ -2120,8 +2120,8 @@ class Device:  # pylint: disable=too-many-instance-attributes,too-many-public-me
                self.handle,
                # Only utilization samples that were recorded after this timestamp will be returned.
                # The CPU timestamp, i.e. absolute Unix epoch timestamp (in microseconds), is used.
-                # Here we use the timestamp 1/4 second ago to ensure the record buffer is not empty.
-                time.time_ns() // 1000 - 250_000,
+                # Here we use the timestamp 1 second ago to ensure the record buffer is not empty.
+                time.time_ns() // 1000 - 1000_000,
                default=(),
            )
            for s in sorted(samples, key=lambda s: s.timeStamp):
--- a/nvitop/api/utils.py
+++ b/nvitop/api/utils.py
@ -730,10 +730,11 @@ def memoize_when_activated(method: Method) -> Method:
    """

    @functools.wraps(method)
-    def wrapped(self, *args, **kwargs):  # noqa: ANN001,ANN002,ANN003,ANN202
+    def wrapped(self: object, *args: Any, **kwargs: Any) -> Any:
        try:
            # case 1: we previously entered oneshot() ctx
-            ret = self._cache[method]  # pylint: disable=protected-access
+            # pylint: disable-next=protected-access
+            ret = self._cache[method]  # type: ignore[attr-defined]
        except AttributeError:
            # case 2: we never entered oneshot() ctx
            return method(self, *args, **kwargs)
@ -742,25 +743,28 @@ def memoize_when_activated(method: Method) -> Method:
            # for this entry yet
            ret = method(self, *args, **kwargs)
            try:
-                self._cache[method] = ret  # pylint: disable=protected-access
+                # pylint: disable-next=protected-access
+                self._cache[method] = ret  # type: ignore[attr-defined]
            except AttributeError:
                # multi-threading race condition, see:
                # https://github.com/giampaolo/psutil/issues/1948
                pass
        return ret

-    def cache_activate(self):  # noqa: ANN001,ANN202
+    def cache_activate(self: object) -> None:
        """Activate cache.

        Expects an instance. Cache will be stored as a "_cache" instance attribute.
        """
        if not hasattr(self, '_cache'):
-            self._cache = {}  # pylint: disable=protected-access
+            # pylint: disable-next=protected-access
+            self._cache = {}  # type: ignore[attr-defined]

-    def cache_deactivate(self):  # noqa: ANN001,ANN202
+    def cache_deactivate(self: object) -> None:
        """Deactivate and clear cache."""
        try:
-            del self._cache  # pylint: disable=protected-access
+            # pylint: disable-next=protected-access
+            del self._cache  # type: ignore[attr-defined]
        except AttributeError:
            pass

--- a/nvitop/cli.py
+++ b/nvitop/cli.py
@ -24,7 +24,7 @@ NVITOP_MONITOR_MODE = set(

 # pylint: disable=too-many-branches,too-many-statements
 def parse_arguments() -> argparse.Namespace:
-    """Parse command-line arguments for ``nvtiop``."""
+    """Parse command-line arguments for ``nvitop``."""
    coloring_rules = '{} < th1 %% <= {} < th2 %% <= {}'.format(
        colored('light', 'green'),
        colored('moderate', 'yellow'),
--- a/nvitop/version.py
+++ b/nvitop/version.py
@ -17,7 +17,7 @@
 """An interactive NVIDIA-GPU process viewer and beyond, the one-stop solution for GPU process management."""

 __version__ = '1.2.0'
-__license__ = 'GPLv3'
+__license__ = 'GPL-3.0-only AND Apache-2.0'
 __author__ = __maintainer__ = 'Xuehai Pan'
 __email__ = 'XuehaiPan@pku.edu.cn'
 __release__ = False
--- a/pyproject.toml
+++ b/pyproject.toml
@ -76,7 +76,8 @@ target-version = ["py37", "py38", "py39", "py310", "py311"]
 [tool.isort]
 atomic = true
 profile = "black"
-src_paths = ["nvitop"]
+src_paths = ["nvitop", "nvitop-exporter/nvitop_exporter"]
+known_first_party = ["nvitop", "nvitop_exporter"]
 indent = 4
 line_length = 100
 lines_after_imports = 2
@ -85,14 +86,16 @@ multi_line_output = 3
 [tool.mypy]
 # Sync with requires-python
 python_version = 3.8  # appease mypy for syntax errors in numpy stubs
+mypy_path = [".", "nvitop-exporter"]
+exclude = ["nvitop-exporter/setup.py"]
 pretty = true
 show_error_codes = true
 show_error_context = true
 show_traceback = true
 allow_redefinition = true
 check_untyped_defs = true
-disallow_incomplete_defs = false
-disallow_untyped_defs = false
+disallow_incomplete_defs = true
+disallow_untyped_defs = true
 ignore_missing_imports = true
 no_implicit_optional = true
 strict_equality = true
@ -119,7 +122,7 @@ ignore-words = "docs/source/spelling_wordlist.txt"
 target-version = "py37"
 line-length = 100
 show-source = true
-src = ["nvitop"]
+src = ["nvitop", "nvitop-exporter/nvitop_exporter"]
 select = [
    "E", "W",  # pycodestyle
    "F",       # pyflakes