From 4e814c52a666a716821d85ac5acfb1f3490e5650 Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Wed, 6 May 2026 17:26:32 +0800 Subject: [PATCH] feat(exporter): support TLS and mutual TLS for the metrics endpoint (#213) --- CHANGELOG.md | 3 +- nvitop-exporter/README.md | 58 +++++++++++++++++ nvitop-exporter/nvitop_exporter/cli.py | 89 ++++++++++++++++++++++++-- nvitop-exporter/pyproject.toml | 2 +- nvitop-exporter/requirements.txt | 2 +- 5 files changed, 147 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a29218..76ea513 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `nvidia-ml-py` 13.595.45 to support list. - Add support for open kernel-module driver packages (e.g., `nvidia-driver-595-open`) in `install-nvidia-driver.sh` with new `--proprietary` and `--open` flags by [@XuehaiPan](https://github.com/XuehaiPan). +- Add TLS and mutual TLS (mTLS) support for `nvitop-exporter` via new `--certfile`, `--keyfile`, `--client-cafile`, `--client-capath`, and `--client-auth-required` CLI flags by [@XuehaiPan](https://github.com/XuehaiPan) in [#213](https://github.com/XuehaiPan/nvitop/pull/213). Issued by [@StefanSander3](https://github.com/StefanSander3) in [#131](https://github.com/XuehaiPan/nvitop/issues/131). ### Changed -- +- Bump minimum `prometheus-client` version to `0.19.0` for `nvitop-exporter` (required for TLS support) by [@XuehaiPan](https://github.com/XuehaiPan) in [#213](https://github.com/XuehaiPan/nvitop/pull/213). ### Fixed diff --git a/nvitop-exporter/README.md b/nvitop-exporter/README.md index a4a3762..9d36bc8 100644 --- a/nvitop-exporter/README.md +++ b/nvitop-exporter/README.md @@ -23,6 +23,64 @@ scrape_configs: - targets: ['localhost:5050'] ``` +## TLS / mTLS + +The exporter can serve metrics over HTTPS, optionally requiring client certificate authentication (mTLS). TLS support is provided by `prometheus_client` (>= 0.19.0) and configured entirely through CLI flags — no config file is involved. + +### Plain HTTPS + +Provide a server certificate and private key: + +```bash +nvitop-exporter --bind-address 0.0.0.0 --port 5050 \ + --certfile /path/to/server.crt \ + --keyfile /path/to/server.key +``` + +The metrics endpoint is then served at [`https://localhost:5050/metrics`](https://localhost:5050/metrics). Update the Prometheus scrape config to use the `https` scheme, and point it at the CA that signed your server certificate: + +```yaml +scrape_configs: + - job_name: 'nvitop-exporter' + scheme: https + static_configs: + - targets: ['localhost:5050'] + tls_config: + ca_file: /path/to/server-ca.crt +``` + +### Mutual TLS (mTLS) + +To require scrapers to present a valid client certificate, pass a CA bundle (`--client-cafile`) or CA directory (`--client-capath`) **and** `--client-auth-required`: + +```bash +nvitop-exporter --bind-address 0.0.0.0 --port 5050 \ + --certfile /path/to/server.crt \ + --keyfile /path/to/server.key \ + --client-cafile /path/to/clients-ca.crt \ + --client-auth-required +``` + +`--client-cafile` / `--client-capath` and `--client-auth-required` must be specified together. Passing a CA without `--client-auth-required` is rejected by the CLI to avoid the silent "trust but don't verify" configuration that the underlying `prometheus_client` API would otherwise allow. + +Configure Prometheus to present its client certificate when scraping: + +```yaml +scrape_configs: + - job_name: 'nvitop-exporter' + scheme: https + static_configs: + - targets: ['localhost:5050'] + tls_config: + ca_file: /path/to/server-ca.crt + cert_file: /path/to/prometheus-client.crt + key_file: /path/to/prometheus-client.key +``` + +### Authentication beyond mTLS + +The exporter does not implement HTTP basic auth, OAuth, or IP allowlisting. Following the standard Prometheus exporter pattern, run the exporter behind a reverse proxy (`NGINX`, `Traefik`, `Caddy`, ...) if any of those are required. + ## Grafana Dashboard A Grafana dashboard is provided to visualize the metrics collected by the exporter. diff --git a/nvitop-exporter/nvitop_exporter/cli.py b/nvitop-exporter/nvitop_exporter/cli.py index 1806b45..0535cde 100644 --- a/nvitop-exporter/nvitop_exporter/cli.py +++ b/nvitop-exporter/nvitop_exporter/cli.py @@ -19,6 +19,7 @@ from __future__ import annotations import argparse +import os import sys from typing import TextIO @@ -118,6 +119,58 @@ def parse_arguments() -> argparse.Namespace: help='Interval between updates in seconds. (default: %(default)s)', ) + tls_group = parser.add_argument_group('TLS / mTLS options') + tls_group.add_argument( + '--certfile', + dest='certfile', + type=str, + default=None, + metavar='PATH', + help=( + 'Path to the TLS certificate file (PEM).\n' + 'Enables HTTPS when set together with `--keyfile`.' + ), + ) + tls_group.add_argument( + '--keyfile', + dest='keyfile', + type=str, + default=None, + metavar='PATH', + help='Path to the TLS private key file (PEM).\nRequired if `--certfile` is set.', + ) + tls_group.add_argument( + '--client-cafile', + dest='client_cafile', + type=str, + default=None, + metavar='PATH', + help=( + 'Path to a PEM bundle of trusted client CA certificates for mutual TLS.\n' + 'Requires `--client-auth-required` to actually verify client certificates.' + ), + ) + tls_group.add_argument( + '--client-capath', + dest='client_capath', + type=str, + default=None, + metavar='PATH', + help=( + 'Path to a directory of trusted client CA certificates for mutual TLS.\n' + 'Requires `--client-auth-required` to actually verify client certificates.' + ), + ) + tls_group.add_argument( + '--client-auth-required', + dest='client_auth_required', + action='store_true', + help=( + 'Require clients to present a valid certificate (mutual TLS).\n' + 'Requires `--client-cafile` or `--client-capath`.' + ), + ) + args = parser.parse_args() if args.interval < 0.25: parser.error( @@ -125,12 +178,32 @@ def parse_arguments() -> argparse.Namespace: f'Expected 1/4 or higher.', ) + if (args.certfile is None) != (args.keyfile is None): + parser.error('`--certfile` and `--keyfile` must be specified together.') + if args.certfile is not None and not os.path.isfile(args.certfile): + parser.error(f'`--certfile` not found: {args.certfile}') + if args.keyfile is not None and not os.path.isfile(args.keyfile): + parser.error(f'`--keyfile` not found: {args.keyfile}') + if args.client_cafile is not None and not os.path.isfile(args.client_cafile): + parser.error(f'`--client-cafile` not found: {args.client_cafile}') + if args.client_capath is not None and not os.path.isdir(args.client_capath): + parser.error(f'`--client-capath` not a directory: {args.client_capath}') + ca_provided = args.client_cafile is not None or args.client_capath is not None + if (ca_provided or args.client_auth_required) and args.certfile is None: + parser.error('Mutual TLS options require `--certfile` and `--keyfile`.') + if ca_provided != args.client_auth_required: + parser.error( + '`--client-cafile` / `--client-capath` and `--client-auth-required` must be ' + 'specified together to enable mutual TLS.', + ) + return args def main() -> int: # pylint: disable=too-many-locals,too-many-statements """Main function for ``nvitop-exporter`` CLI.""" args = parse_arguments() + scheme = 'https' if args.certfile is not None else 'http' try: device_count = Device.count() @@ -181,7 +254,15 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements exporter = PrometheusExporter(devices, hostname=args.hostname, interval=args.interval) try: - start_wsgi_server(port=args.port, addr=args.bind_address) + start_wsgi_server( + port=args.port, + addr=args.bind_address, + certfile=args.certfile, + keyfile=args.keyfile, + client_cafile=args.client_cafile, + client_capath=args.client_capath, + client_auth_required=args.client_auth_required, + ) except OSError as ex: if 'address already in use' in str(ex).lower(): cprint( @@ -190,7 +271,7 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements 'Please specify a different port via `--port `.' ).format( colored( - f'http://{args.bind_address}:{args.port}', + f'{scheme}://{args.bind_address}:{args.port}', color='blue', attrs=('bold', 'underline'), ), @@ -204,7 +285,7 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements 'Please specify a different address via `--bind-address
`.' ).format( colored( - f'http://{args.bind_address}:{args.port}', + f'{scheme}://{args.bind_address}:{args.port}', color='blue', attrs=('bold', 'underline'), ), @@ -219,7 +300,7 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements 'INFO: Start the exporter on {} at {}.'.format( colored(args.hostname, color='magenta', attrs=('bold',)), colored( - f'http://{args.bind_address}:{args.port}/metrics', + f'{scheme}://{args.bind_address}:{args.port}/metrics', color='green', attrs=('bold', 'underline'), ), diff --git a/nvitop-exporter/pyproject.toml b/nvitop-exporter/pyproject.toml index 335394f..b8165cb 100644 --- a/nvitop-exporter/pyproject.toml +++ b/nvitop-exporter/pyproject.toml @@ -49,7 +49,7 @@ classifiers = [ dependencies = [ # Sync with nvitop/version.py and requirements.txt "nvitop ~= 1.6.2", - "prometheus-client >= 0.4.0", + "prometheus-client >= 0.19.0", ] dynamic = ["version"] diff --git a/nvitop-exporter/requirements.txt b/nvitop-exporter/requirements.txt index 2b792ce..d04fdde 100644 --- a/nvitop-exporter/requirements.txt +++ b/nvitop-exporter/requirements.txt @@ -1,2 +1,2 @@ nvitop -prometheus-client >= 0.4.0 +prometheus-client >= 0.19.0