mirror of
https://github.com/XuehaiPan/nvitop.git
synced 2026-05-21 06:45:24 -06:00
2788 lines
70 KiB
JSON
2788 lines
70 KiB
JSON
{
|
|
"__inputs": [
|
|
{
|
|
"name": "DS_PROMETHEUS",
|
|
"label": "prometheus",
|
|
"description": "",
|
|
"type": "datasource",
|
|
"pluginId": "prometheus",
|
|
"pluginName": "Prometheus"
|
|
}
|
|
],
|
|
"__elements": {},
|
|
"__requires": [
|
|
{
|
|
"type": "panel",
|
|
"id": "gauge",
|
|
"name": "Gauge",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "grafana",
|
|
"id": "grafana",
|
|
"name": "Grafana",
|
|
"version": "12.0.2"
|
|
},
|
|
{
|
|
"type": "datasource",
|
|
"id": "prometheus",
|
|
"name": "Prometheus",
|
|
"version": "1.0.0"
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "stat",
|
|
"name": "Stat",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "table",
|
|
"name": "Table",
|
|
"version": ""
|
|
},
|
|
{
|
|
"type": "panel",
|
|
"id": "timeseries",
|
|
"name": "Time series",
|
|
"version": ""
|
|
}
|
|
],
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "Grafana Dashboard built by `nvitop-exporter`.",
|
|
"editable": false,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 0,
|
|
"id": null,
|
|
"links": [],
|
|
"liveNow": true,
|
|
"panels": [
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 1,
|
|
"panels": [],
|
|
"title": "Overview",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 1
|
|
},
|
|
"id": 2,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "host_cpu_percent_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} CPU",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "CPU Utilization",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 18,
|
|
"w": 10,
|
|
"x": 4,
|
|
"y": 1
|
|
},
|
|
"id": 3,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "gpu_utilization_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Utilization",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 18,
|
|
"w": 10,
|
|
"x": 14,
|
|
"y": 1
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "gpu_memory_percent_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Memory",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"description": "",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 10
|
|
},
|
|
"id": 5,
|
|
"options": {
|
|
"minVizHeight": 75,
|
|
"minVizWidth": 75,
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"sizing": "auto"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "host_virtual_memory_percent_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} MEM",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Host Virtual Memory",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 19
|
|
},
|
|
"id": 6,
|
|
"panels": [],
|
|
"title": "Process",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"displayName": "occupied",
|
|
"min": 0,
|
|
"noValue": "0",
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 80
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
},
|
|
{
|
|
"color": "dark-red",
|
|
"value": 95
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Value #number of gpus"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "displayName",
|
|
"value": "total"
|
|
},
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "gray",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 5,
|
|
"x": 0,
|
|
"y": 20
|
|
},
|
|
"id": 7,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "value_and_name",
|
|
"wideLayout": true
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "count(group by(uuid) (process_gpu_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"}))",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "number of occupied gpus",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "count(gpu_memory_total_MiB{hostname=~\"$hostname\"})",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "number of gpus",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Number of GPUs",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"fieldMinMax": false,
|
|
"min": 0,
|
|
"noValue": "No Running Processes",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 4
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 6
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 8
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 19,
|
|
"x": 5,
|
|
"y": 20
|
|
},
|
|
"id": 8,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "value_and_name",
|
|
"wideLayout": true
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "count by(hostname, index) (process_gpu_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"})",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": false,
|
|
"refId": "number of gpu processes",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Number of Running GPU Processes",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"min": 0,
|
|
"noValue": "No Running Processes",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 8
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 16
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 32
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 5,
|
|
"x": 0,
|
|
"y": 24
|
|
},
|
|
"id": 9,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "value_and_name",
|
|
"wideLayout": true
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "count by(hostname) (count by(hostname, pid) (process_info_info{hostname=~\"$hostname\", username=~\"$username\"}))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "number of gpu processes",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Number of Running GPU Processes",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"custom": {
|
|
"align": "right",
|
|
"cellOptions": {
|
|
"type": "auto",
|
|
"wrapText": false
|
|
},
|
|
"filterable": true,
|
|
"inspect": false,
|
|
"minWidth": 50
|
|
},
|
|
"fieldMinMax": true,
|
|
"min": 0,
|
|
"noValue": "No Running Processes",
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byRegexp",
|
|
"options": "/^%.*$/"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.cellOptions",
|
|
"value": {
|
|
"mode": "lcd",
|
|
"type": "gauge",
|
|
"valueDisplayMode": "color"
|
|
}
|
|
},
|
|
{
|
|
"id": "max",
|
|
"value": 100
|
|
},
|
|
{
|
|
"id": "unit",
|
|
"value": "percent"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "RSS MEMORY"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "mbytes"
|
|
},
|
|
{
|
|
"id": "custom.cellOptions",
|
|
"value": {
|
|
"mode": "gradient",
|
|
"type": "gauge",
|
|
"valueDisplayMode": "color"
|
|
}
|
|
},
|
|
{
|
|
"id": "custom.minWidth",
|
|
"value": 150
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "GPU MEMORY"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "mbytes"
|
|
},
|
|
{
|
|
"id": "custom.cellOptions",
|
|
"value": {
|
|
"mode": "gradient",
|
|
"type": "gauge",
|
|
"valueDisplayMode": "color"
|
|
}
|
|
},
|
|
{
|
|
"id": "custom.minWidth",
|
|
"value": 150
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "TIME"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "s"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "COMMAND"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.align",
|
|
"value": "left"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 13,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 28
|
|
},
|
|
"id": 10,
|
|
"options": {
|
|
"cellHeight": "sm",
|
|
"footer": {
|
|
"countRows": false,
|
|
"enablePagination": false,
|
|
"fields": [
|
|
"Value #gpu memory (lastNotNull)"
|
|
],
|
|
"reducer": [
|
|
"sum"
|
|
],
|
|
"show": true
|
|
},
|
|
"frameIndex": 0,
|
|
"showHeader": true,
|
|
"sortBy": [
|
|
{
|
|
"desc": true,
|
|
"displayName": "GPU MEMORY"
|
|
}
|
|
]
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_cpu_percent_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"interval": "",
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "cpu percent",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_rss_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "rss memory",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_gpu_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "gpu memory",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_gpu_sm_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "gpu sm utilization",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_gpu_memory_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "gpu memory bandwidth utilization",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_gpu_encoder_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "gpu encoder utilization",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_gpu_decoder_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "gpu decoder utilization",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_running_time_Second{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"interval": "",
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "running time",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "process_info_info{hostname=~\"$hostname\", username=~\"$username\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "command",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "host_virtual_memory_total_MiB{hostname=~\"$hostname\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "host memory total",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"exemplar": false,
|
|
"expr": "gpu_memory_total_MiB{hostname!~\"$hostname\"}",
|
|
"format": "table",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "gpu memory total",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Processes",
|
|
"transformations": [
|
|
{
|
|
"id": "configFromData",
|
|
"options": {
|
|
"applyTo": {
|
|
"id": "byType",
|
|
"options": "number"
|
|
},
|
|
"configRefId": "host memory total",
|
|
"mappings": [
|
|
{
|
|
"fieldName": "Value #rss memory",
|
|
"handlerKey": "max"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"id": "configFromData",
|
|
"options": {
|
|
"applyTo": {
|
|
"id": "byType",
|
|
"options": "number"
|
|
},
|
|
"configRefId": "gpu memory total",
|
|
"mappings": [
|
|
{
|
|
"fieldName": "Value #gpu memory",
|
|
"handlerKey": "max"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"id": "merge",
|
|
"options": {}
|
|
},
|
|
{
|
|
"id": "groupBy",
|
|
"options": {
|
|
"fields": {
|
|
"Row": {
|
|
"aggregations": [],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #command": {
|
|
"aggregations": []
|
|
},
|
|
"Value #cpu percent": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #gpu decoder utilization": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #gpu encoder utilization": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #gpu memory": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #gpu memory bandwidth utilization": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #gpu sm utilization": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #rss memory": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"Value #running time": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"command": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"hostname": {
|
|
"aggregations": [],
|
|
"operation": "groupby"
|
|
},
|
|
"index": {
|
|
"aggregations": [],
|
|
"operation": "groupby"
|
|
},
|
|
"pid": {
|
|
"aggregations": [],
|
|
"operation": "groupby"
|
|
},
|
|
"status": {
|
|
"aggregations": [
|
|
"lastNotNull"
|
|
],
|
|
"operation": "aggregate"
|
|
},
|
|
"username": {
|
|
"aggregations": [],
|
|
"operation": "groupby"
|
|
},
|
|
"uuid": {
|
|
"aggregations": [],
|
|
"operation": "groupby"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": "organize",
|
|
"options": {
|
|
"excludeByName": {
|
|
"uuid": true
|
|
},
|
|
"includeByName": {},
|
|
"indexByName": {
|
|
"Value #cpu percent (lastNotNull)": 6,
|
|
"Value #gpu decoder utilization (lastNotNull)": 12,
|
|
"Value #gpu encoder utilization (lastNotNull)": 11,
|
|
"Value #gpu memory (lastNotNull)": 8,
|
|
"Value #gpu memory bandwidth utilization (lastNotNull)": 10,
|
|
"Value #gpu sm utilization (lastNotNull)": 9,
|
|
"Value #rss memory (lastNotNull)": 7,
|
|
"Value #running time (lastNotNull)": 13,
|
|
"command (lastNotNull)": 14,
|
|
"hostname": 0,
|
|
"index": 3,
|
|
"pid": 1,
|
|
"status (lastNotNull)": 5,
|
|
"username": 2,
|
|
"uuid": 4
|
|
},
|
|
"renameByName": {
|
|
"Value #cpu percent (lastNotNull)": "%CPU",
|
|
"Value #gpu decoder utilization (lastNotNull)": "%DEC",
|
|
"Value #gpu encoder utilization (lastNotNull)": "%ENC",
|
|
"Value #gpu memory (lastNotNull)": "GPU MEMORY",
|
|
"Value #gpu memory bandwidth utilization (lastNotNull)": "%GMBW",
|
|
"Value #gpu sm utilization (lastNotNull)": "%SM",
|
|
"Value #rss memory (lastNotNull)": "RSS MEMORY",
|
|
"Value #running time (lastNotNull)": "TIME",
|
|
"command (lastNotNull)": "COMMAND",
|
|
"hostname": "HOSTNAME",
|
|
"index": "DEVICE",
|
|
"pid": "PID",
|
|
"status (lastNotNull)": "STATUS",
|
|
"username": "USERNAME"
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"type": "table"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 41
|
|
},
|
|
"id": 11,
|
|
"panels": [],
|
|
"title": "System",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 10,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 2,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 42
|
|
},
|
|
"id": 12,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "host_cpu_percent_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "CPU Utilization",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 10,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 2,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 42
|
|
},
|
|
"id": 13,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "host_load_average_1m_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} (1m)",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "host_load_average_5m_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} (5m)",
|
|
"range": true,
|
|
"refId": "B",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "host_load_average_15m_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} (15m)",
|
|
"range": true,
|
|
"refId": "C",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "CPU Load Average",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1024,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 2,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "mbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 56
|
|
},
|
|
"id": 14,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "host_virtual_memory_used_MiB{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Host Virtual Memory",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1024,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 2,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "mbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 56
|
|
},
|
|
"id": 15,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "host_swap_memory_used_MiB{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Host Swap Memory",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 70
|
|
},
|
|
"id": 16,
|
|
"panels": [],
|
|
"title": "System I/O",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": true,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1,
|
|
"axisSoftMin": -1,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 2,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "MiBs"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byRegexp",
|
|
"options": "/.* TX$/"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.transform",
|
|
"value": "negative-Y"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 71
|
|
},
|
|
"id": 17,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "sum by(hostname) (rate(host_net_io_rx_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} RX",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "sum by(hostname) (rate(host_net_io_tx_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} TX",
|
|
"range": true,
|
|
"refId": "B",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Host Network I/O",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": true,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1,
|
|
"axisSoftMin": -1,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 2,
|
|
"gradientMode": "opacity",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "MiBs"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byRegexp",
|
|
"options": "/.* Write$/"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.transform",
|
|
"value": "negative-Y"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 71
|
|
},
|
|
"id": 18,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "sum by(hostname) (rate(host_disk_io_read_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} Read",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "sum by(hostname) (rate(host_disk_io_write_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} Write",
|
|
"range": true,
|
|
"refId": "B",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "Host Disk I/O",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 85
|
|
},
|
|
"id": 19,
|
|
"panels": [],
|
|
"title": "Device",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"max": 100,
|
|
"min": 0,
|
|
"unit": "percent"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 86
|
|
},
|
|
"id": 20,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_utilization_Percentage{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Utilization",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1024,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "mbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 86
|
|
},
|
|
"id": 21,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_memory_used_MiB{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Memory",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "MiBs"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 100
|
|
},
|
|
"id": 22,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_pcie_rx_throughput_MiBps{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU PCIe RX Throughput",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "MiBs"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 100
|
|
},
|
|
"id": 23,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_pcie_tx_throughput_MiBps{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU PCIe TX Throughput",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "MiBs"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 114
|
|
},
|
|
"id": 24,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_nvlink_total_rx_throughput_MiBps{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU NVLink RX Throughput",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMax": 1,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"min": 0,
|
|
"unit": "MiBs"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 114
|
|
},
|
|
"id": 25,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_nvlink_total_tx_throughput_MiBps{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU NVLink TX Throughput",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "watt"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 128
|
|
},
|
|
"id": 26,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_power_usage_W{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Power Usage",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "celsius"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 14,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 128
|
|
},
|
|
"id": 27,
|
|
"options": {
|
|
"legend": {
|
|
"calcs": [
|
|
"lastNotNull",
|
|
"mean",
|
|
"max"
|
|
],
|
|
"displayMode": "table",
|
|
"placement": "bottom",
|
|
"showLegend": true,
|
|
"sortBy": "Name",
|
|
"sortDesc": false
|
|
},
|
|
"tooltip": {
|
|
"maxHeight": 600,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "${DS_PROMETHEUS}"
|
|
},
|
|
"disableTextWrap": false,
|
|
"editorMode": "builder",
|
|
"expr": "gpu_temperature_C{hostname=~\"$hostname\"}",
|
|
"fullMetaSearch": false,
|
|
"hide": false,
|
|
"includeNullMetadata": true,
|
|
"instant": false,
|
|
"legendFormat": "{{hostname}} GPU {{index}}",
|
|
"range": true,
|
|
"refId": "A",
|
|
"useBackend": false
|
|
}
|
|
],
|
|
"title": "GPU Temperature",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"refresh": "5s",
|
|
"schemaVersion": 41,
|
|
"tags": [
|
|
"nvitop",
|
|
"nvitop-exporter",
|
|
"prometheus",
|
|
"nvidia",
|
|
"gpu",
|
|
"gpu process",
|
|
"gpu monitoring"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {},
|
|
"definition": "label_values(hostname)",
|
|
"description": "",
|
|
"includeAll": true,
|
|
"multi": true,
|
|
"name": "hostname",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(hostname)"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"type": "query"
|
|
},
|
|
{
|
|
"current": {},
|
|
"definition": "label_values(username)",
|
|
"description": "",
|
|
"includeAll": true,
|
|
"multi": true,
|
|
"name": "username",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(username)"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-24h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "browser",
|
|
"title": "nvitop-dashboard",
|
|
"uid": "bdl3vqwxprhtsa",
|
|
"version": 1,
|
|
"weekStart": ""
|
|
}
|