mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-15 06:06:12 -06:00
feat(exporter): support TLS and mutual TLS for the metrics endpoint (#213)
This commit is contained in:
parent
1bed33ed1e
commit
4e814c52a6
5 changed files with 147 additions and 7 deletions
|
|
@ -15,10 +15,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
- Add `nvidia-ml-py` 13.595.45 to support list.
|
- Add `nvidia-ml-py` 13.595.45 to support list.
|
||||||
- Add support for open kernel-module driver packages (e.g., `nvidia-driver-595-open`) in `install-nvidia-driver.sh` with new `--proprietary` and `--open` flags by [@XuehaiPan](https://github.com/XuehaiPan).
|
- Add support for open kernel-module driver packages (e.g., `nvidia-driver-595-open`) in `install-nvidia-driver.sh` with new `--proprietary` and `--open` flags by [@XuehaiPan](https://github.com/XuehaiPan).
|
||||||
|
- Add TLS and mutual TLS (mTLS) support for `nvitop-exporter` via new `--certfile`, `--keyfile`, `--client-cafile`, `--client-capath`, and `--client-auth-required` CLI flags by [@XuehaiPan](https://github.com/XuehaiPan) in [#213](https://github.com/XuehaiPan/nvitop/pull/213). Issued by [@StefanSander3](https://github.com/StefanSander3) in [#131](https://github.com/XuehaiPan/nvitop/issues/131).
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
-
|
- Bump minimum `prometheus-client` version to `0.19.0` for `nvitop-exporter` (required for TLS support) by [@XuehaiPan](https://github.com/XuehaiPan) in [#213](https://github.com/XuehaiPan/nvitop/pull/213).
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,64 @@ scrape_configs:
|
||||||
- targets: ['localhost:5050']
|
- targets: ['localhost:5050']
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## TLS / mTLS
|
||||||
|
|
||||||
|
The exporter can serve metrics over HTTPS, optionally requiring client certificate authentication (mTLS). TLS support is provided by `prometheus_client` (>= 0.19.0) and configured entirely through CLI flags — no config file is involved.
|
||||||
|
|
||||||
|
### Plain HTTPS
|
||||||
|
|
||||||
|
Provide a server certificate and private key:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nvitop-exporter --bind-address 0.0.0.0 --port 5050 \
|
||||||
|
--certfile /path/to/server.crt \
|
||||||
|
--keyfile /path/to/server.key
|
||||||
|
```
|
||||||
|
|
||||||
|
The metrics endpoint is then served at [`https://localhost:5050/metrics`](https://localhost:5050/metrics). Update the Prometheus scrape config to use the `https` scheme, and point it at the CA that signed your server certificate:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'nvitop-exporter'
|
||||||
|
scheme: https
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:5050']
|
||||||
|
tls_config:
|
||||||
|
ca_file: /path/to/server-ca.crt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Mutual TLS (mTLS)
|
||||||
|
|
||||||
|
To require scrapers to present a valid client certificate, pass a CA bundle (`--client-cafile`) or CA directory (`--client-capath`) **and** `--client-auth-required`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
nvitop-exporter --bind-address 0.0.0.0 --port 5050 \
|
||||||
|
--certfile /path/to/server.crt \
|
||||||
|
--keyfile /path/to/server.key \
|
||||||
|
--client-cafile /path/to/clients-ca.crt \
|
||||||
|
--client-auth-required
|
||||||
|
```
|
||||||
|
|
||||||
|
`--client-cafile` / `--client-capath` and `--client-auth-required` must be specified together. Passing a CA without `--client-auth-required` is rejected by the CLI to avoid the silent "trust but don't verify" configuration that the underlying `prometheus_client` API would otherwise allow.
|
||||||
|
|
||||||
|
Configure Prometheus to present its client certificate when scraping:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'nvitop-exporter'
|
||||||
|
scheme: https
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:5050']
|
||||||
|
tls_config:
|
||||||
|
ca_file: /path/to/server-ca.crt
|
||||||
|
cert_file: /path/to/prometheus-client.crt
|
||||||
|
key_file: /path/to/prometheus-client.key
|
||||||
|
```
|
||||||
|
|
||||||
|
### Authentication beyond mTLS
|
||||||
|
|
||||||
|
The exporter does not implement HTTP basic auth, OAuth, or IP allowlisting. Following the standard Prometheus exporter pattern, run the exporter behind a reverse proxy (`NGINX`, `Traefik`, `Caddy`, ...) if any of those are required.
|
||||||
|
|
||||||
## Grafana Dashboard
|
## Grafana Dashboard
|
||||||
|
|
||||||
A Grafana dashboard is provided to visualize the metrics collected by the exporter.
|
A Grafana dashboard is provided to visualize the metrics collected by the exporter.
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import TextIO
|
from typing import TextIO
|
||||||
|
|
||||||
|
|
@ -118,6 +119,58 @@ def parse_arguments() -> argparse.Namespace:
|
||||||
help='Interval between updates in seconds. (default: %(default)s)',
|
help='Interval between updates in seconds. (default: %(default)s)',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tls_group = parser.add_argument_group('TLS / mTLS options')
|
||||||
|
tls_group.add_argument(
|
||||||
|
'--certfile',
|
||||||
|
dest='certfile',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
metavar='PATH',
|
||||||
|
help=(
|
||||||
|
'Path to the TLS certificate file (PEM).\n'
|
||||||
|
'Enables HTTPS when set together with `--keyfile`.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
tls_group.add_argument(
|
||||||
|
'--keyfile',
|
||||||
|
dest='keyfile',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
metavar='PATH',
|
||||||
|
help='Path to the TLS private key file (PEM).\nRequired if `--certfile` is set.',
|
||||||
|
)
|
||||||
|
tls_group.add_argument(
|
||||||
|
'--client-cafile',
|
||||||
|
dest='client_cafile',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
metavar='PATH',
|
||||||
|
help=(
|
||||||
|
'Path to a PEM bundle of trusted client CA certificates for mutual TLS.\n'
|
||||||
|
'Requires `--client-auth-required` to actually verify client certificates.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
tls_group.add_argument(
|
||||||
|
'--client-capath',
|
||||||
|
dest='client_capath',
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
metavar='PATH',
|
||||||
|
help=(
|
||||||
|
'Path to a directory of trusted client CA certificates for mutual TLS.\n'
|
||||||
|
'Requires `--client-auth-required` to actually verify client certificates.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
tls_group.add_argument(
|
||||||
|
'--client-auth-required',
|
||||||
|
dest='client_auth_required',
|
||||||
|
action='store_true',
|
||||||
|
help=(
|
||||||
|
'Require clients to present a valid certificate (mutual TLS).\n'
|
||||||
|
'Requires `--client-cafile` or `--client-capath`.'
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.interval < 0.25:
|
if args.interval < 0.25:
|
||||||
parser.error(
|
parser.error(
|
||||||
|
|
@ -125,12 +178,32 @@ def parse_arguments() -> argparse.Namespace:
|
||||||
f'Expected 1/4 or higher.',
|
f'Expected 1/4 or higher.',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if (args.certfile is None) != (args.keyfile is None):
|
||||||
|
parser.error('`--certfile` and `--keyfile` must be specified together.')
|
||||||
|
if args.certfile is not None and not os.path.isfile(args.certfile):
|
||||||
|
parser.error(f'`--certfile` not found: {args.certfile}')
|
||||||
|
if args.keyfile is not None and not os.path.isfile(args.keyfile):
|
||||||
|
parser.error(f'`--keyfile` not found: {args.keyfile}')
|
||||||
|
if args.client_cafile is not None and not os.path.isfile(args.client_cafile):
|
||||||
|
parser.error(f'`--client-cafile` not found: {args.client_cafile}')
|
||||||
|
if args.client_capath is not None and not os.path.isdir(args.client_capath):
|
||||||
|
parser.error(f'`--client-capath` not a directory: {args.client_capath}')
|
||||||
|
ca_provided = args.client_cafile is not None or args.client_capath is not None
|
||||||
|
if (ca_provided or args.client_auth_required) and args.certfile is None:
|
||||||
|
parser.error('Mutual TLS options require `--certfile` and `--keyfile`.')
|
||||||
|
if ca_provided != args.client_auth_required:
|
||||||
|
parser.error(
|
||||||
|
'`--client-cafile` / `--client-capath` and `--client-auth-required` must be '
|
||||||
|
'specified together to enable mutual TLS.',
|
||||||
|
)
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
def main() -> int: # pylint: disable=too-many-locals,too-many-statements
|
def main() -> int: # pylint: disable=too-many-locals,too-many-statements
|
||||||
"""Main function for ``nvitop-exporter`` CLI."""
|
"""Main function for ``nvitop-exporter`` CLI."""
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
|
scheme = 'https' if args.certfile is not None else 'http'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
device_count = Device.count()
|
device_count = Device.count()
|
||||||
|
|
@ -181,7 +254,15 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements
|
||||||
exporter = PrometheusExporter(devices, hostname=args.hostname, interval=args.interval)
|
exporter = PrometheusExporter(devices, hostname=args.hostname, interval=args.interval)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
start_wsgi_server(port=args.port, addr=args.bind_address)
|
start_wsgi_server(
|
||||||
|
port=args.port,
|
||||||
|
addr=args.bind_address,
|
||||||
|
certfile=args.certfile,
|
||||||
|
keyfile=args.keyfile,
|
||||||
|
client_cafile=args.client_cafile,
|
||||||
|
client_capath=args.client_capath,
|
||||||
|
client_auth_required=args.client_auth_required,
|
||||||
|
)
|
||||||
except OSError as ex:
|
except OSError as ex:
|
||||||
if 'address already in use' in str(ex).lower():
|
if 'address already in use' in str(ex).lower():
|
||||||
cprint(
|
cprint(
|
||||||
|
|
@ -190,7 +271,7 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements
|
||||||
'Please specify a different port via `--port <PORT>`.'
|
'Please specify a different port via `--port <PORT>`.'
|
||||||
).format(
|
).format(
|
||||||
colored(
|
colored(
|
||||||
f'http://{args.bind_address}:{args.port}',
|
f'{scheme}://{args.bind_address}:{args.port}',
|
||||||
color='blue',
|
color='blue',
|
||||||
attrs=('bold', 'underline'),
|
attrs=('bold', 'underline'),
|
||||||
),
|
),
|
||||||
|
|
@ -204,7 +285,7 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements
|
||||||
'Please specify a different address via `--bind-address <ADDRESS>`.'
|
'Please specify a different address via `--bind-address <ADDRESS>`.'
|
||||||
).format(
|
).format(
|
||||||
colored(
|
colored(
|
||||||
f'http://{args.bind_address}:{args.port}',
|
f'{scheme}://{args.bind_address}:{args.port}',
|
||||||
color='blue',
|
color='blue',
|
||||||
attrs=('bold', 'underline'),
|
attrs=('bold', 'underline'),
|
||||||
),
|
),
|
||||||
|
|
@ -219,7 +300,7 @@ def main() -> int: # pylint: disable=too-many-locals,too-many-statements
|
||||||
'INFO: Start the exporter on {} at {}.'.format(
|
'INFO: Start the exporter on {} at {}.'.format(
|
||||||
colored(args.hostname, color='magenta', attrs=('bold',)),
|
colored(args.hostname, color='magenta', attrs=('bold',)),
|
||||||
colored(
|
colored(
|
||||||
f'http://{args.bind_address}:{args.port}/metrics',
|
f'{scheme}://{args.bind_address}:{args.port}/metrics',
|
||||||
color='green',
|
color='green',
|
||||||
attrs=('bold', 'underline'),
|
attrs=('bold', 'underline'),
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ classifiers = [
|
||||||
dependencies = [
|
dependencies = [
|
||||||
# Sync with nvitop/version.py and requirements.txt
|
# Sync with nvitop/version.py and requirements.txt
|
||||||
"nvitop ~= 1.6.2",
|
"nvitop ~= 1.6.2",
|
||||||
"prometheus-client >= 0.4.0",
|
"prometheus-client >= 0.19.0",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,2 @@
|
||||||
nvitop
|
nvitop
|
||||||
prometheus-client >= 0.4.0
|
prometheus-client >= 0.19.0
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue