nvitop/nvitop-exporter/grafana/dashboard.json

2788 lines
70 KiB
JSON

{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__elements": {},
"__requires": [
{
"type": "panel",
"id": "gauge",
"name": "Gauge",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "12.0.2"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "stat",
"name": "Stat",
"version": ""
},
{
"type": "panel",
"id": "table",
"name": "Table",
"version": ""
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Grafana Dashboard built by `nvitop-exporter`.",
"editable": false,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": true,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"panels": [],
"title": "Overview",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
},
"unit": "percent"
}
},
"gridPos": {
"h": 9,
"w": 4,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "host_cpu_percent_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} CPU",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "CPU Utilization",
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
},
"unit": "percent"
}
},
"gridPos": {
"h": 18,
"w": 10,
"x": 4,
"y": 1
},
"id": 3,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "gpu_utilization_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU Utilization",
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
},
"unit": "percent"
}
},
"gridPos": {
"h": 18,
"w": 10,
"x": 14,
"y": 1
},
"id": 4,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "gpu_memory_percent_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU Memory",
"type": "gauge"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"max": 100,
"min": 0,
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
},
"unit": "percent"
}
},
"gridPos": {
"h": 9,
"w": 4,
"x": 0,
"y": 10
},
"id": 5,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "host_virtual_memory_percent_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} MEM",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Host Virtual Memory",
"type": "gauge"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 19
},
"id": 6,
"panels": [],
"title": "Process",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"displayName": "occupied",
"min": 0,
"noValue": "0",
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "orange",
"value": 80
},
{
"color": "red",
"value": 90
},
{
"color": "dark-red",
"value": 95
}
]
},
"unit": "none"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Value #number of gpus"
},
"properties": [
{
"id": "displayName",
"value": "total"
},
{
"id": "color",
"value": {
"fixedColor": "gray",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 4,
"w": 5,
"x": 0,
"y": 20
},
"id": 7,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "value_and_name",
"wideLayout": true
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "count(group by(uuid) (process_gpu_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"}))",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "number of occupied gpus",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "count(gpu_memory_total_MiB{hostname=~\"$hostname\"})",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "number of gpus",
"useBackend": false
}
],
"title": "Number of GPUs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"fieldMinMax": false,
"min": 0,
"noValue": "No Running Processes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 4
},
{
"color": "orange",
"value": 6
},
{
"color": "red",
"value": 8
}
]
},
"unit": "none"
}
},
"gridPos": {
"h": 8,
"w": 19,
"x": 5,
"y": 20
},
"id": 8,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "value_and_name",
"wideLayout": true
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "count by(hostname, index) (process_gpu_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": false,
"refId": "number of gpu processes",
"useBackend": false
}
],
"title": "Number of Running GPU Processes",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"min": 0,
"noValue": "No Running Processes",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 8
},
{
"color": "orange",
"value": 16
},
{
"color": "red",
"value": 32
}
]
},
"unit": "none"
}
},
"gridPos": {
"h": 4,
"w": 5,
"x": 0,
"y": 24
},
"id": 9,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "value_and_name",
"wideLayout": true
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "count by(hostname) (count by(hostname, pid) (process_info_info{hostname=~\"$hostname\", username=~\"$username\"}))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "number of gpu processes",
"useBackend": false
}
],
"title": "Number of Running GPU Processes",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"custom": {
"align": "right",
"cellOptions": {
"type": "auto",
"wrapText": false
},
"filterable": true,
"inspect": false,
"minWidth": 50
},
"fieldMinMax": true,
"min": 0,
"noValue": "No Running Processes",
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 90
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/^%.*$/"
},
"properties": [
{
"id": "custom.cellOptions",
"value": {
"mode": "lcd",
"type": "gauge",
"valueDisplayMode": "color"
}
},
{
"id": "max",
"value": 100
},
{
"id": "unit",
"value": "percent"
}
]
},
{
"matcher": {
"id": "byName",
"options": "RSS MEMORY"
},
"properties": [
{
"id": "unit",
"value": "mbytes"
},
{
"id": "custom.cellOptions",
"value": {
"mode": "gradient",
"type": "gauge",
"valueDisplayMode": "color"
}
},
{
"id": "custom.minWidth",
"value": 150
}
]
},
{
"matcher": {
"id": "byName",
"options": "GPU MEMORY"
},
"properties": [
{
"id": "unit",
"value": "mbytes"
},
{
"id": "custom.cellOptions",
"value": {
"mode": "gradient",
"type": "gauge",
"valueDisplayMode": "color"
}
},
{
"id": "custom.minWidth",
"value": 150
}
]
},
{
"matcher": {
"id": "byName",
"options": "TIME"
},
"properties": [
{
"id": "unit",
"value": "s"
}
]
},
{
"matcher": {
"id": "byName",
"options": "COMMAND"
},
"properties": [
{
"id": "custom.align",
"value": "left"
}
]
}
]
},
"gridPos": {
"h": 13,
"w": 24,
"x": 0,
"y": 28
},
"id": 10,
"options": {
"cellHeight": "sm",
"footer": {
"countRows": false,
"enablePagination": false,
"fields": [
"Value #gpu memory (lastNotNull)"
],
"reducer": [
"sum"
],
"show": true
},
"frameIndex": 0,
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "GPU MEMORY"
}
]
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_cpu_percent_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"interval": "",
"legendFormat": "__auto",
"range": false,
"refId": "cpu percent",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_rss_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "rss memory",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_gpu_memory_MiB{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "gpu memory",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_gpu_sm_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "gpu sm utilization",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_gpu_memory_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "gpu memory bandwidth utilization",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_gpu_encoder_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "gpu encoder utilization",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_gpu_decoder_utilization_Percentage{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "gpu decoder utilization",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_running_time_Second{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"interval": "",
"legendFormat": "__auto",
"range": false,
"refId": "running time",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "process_info_info{hostname=~\"$hostname\", username=~\"$username\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "command",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "host_virtual_memory_total_MiB{hostname=~\"$hostname\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "host memory total",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "gpu_memory_total_MiB{hostname!~\"$hostname\"}",
"format": "table",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "gpu memory total",
"useBackend": false
}
],
"title": "GPU Processes",
"transformations": [
{
"id": "configFromData",
"options": {
"applyTo": {
"id": "byType",
"options": "number"
},
"configRefId": "host memory total",
"mappings": [
{
"fieldName": "Value #rss memory",
"handlerKey": "max"
}
]
}
},
{
"id": "configFromData",
"options": {
"applyTo": {
"id": "byType",
"options": "number"
},
"configRefId": "gpu memory total",
"mappings": [
{
"fieldName": "Value #gpu memory",
"handlerKey": "max"
}
]
}
},
{
"id": "merge",
"options": {}
},
{
"id": "groupBy",
"options": {
"fields": {
"Row": {
"aggregations": [],
"operation": "aggregate"
},
"Value #command": {
"aggregations": []
},
"Value #cpu percent": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #gpu decoder utilization": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #gpu encoder utilization": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #gpu memory": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #gpu memory bandwidth utilization": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #gpu sm utilization": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #rss memory": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"Value #running time": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"command": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"hostname": {
"aggregations": [],
"operation": "groupby"
},
"index": {
"aggregations": [],
"operation": "groupby"
},
"pid": {
"aggregations": [],
"operation": "groupby"
},
"status": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"username": {
"aggregations": [],
"operation": "groupby"
},
"uuid": {
"aggregations": [],
"operation": "groupby"
}
}
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"uuid": true
},
"includeByName": {},
"indexByName": {
"Value #cpu percent (lastNotNull)": 6,
"Value #gpu decoder utilization (lastNotNull)": 12,
"Value #gpu encoder utilization (lastNotNull)": 11,
"Value #gpu memory (lastNotNull)": 8,
"Value #gpu memory bandwidth utilization (lastNotNull)": 10,
"Value #gpu sm utilization (lastNotNull)": 9,
"Value #rss memory (lastNotNull)": 7,
"Value #running time (lastNotNull)": 13,
"command (lastNotNull)": 14,
"hostname": 0,
"index": 3,
"pid": 1,
"status (lastNotNull)": 5,
"username": 2,
"uuid": 4
},
"renameByName": {
"Value #cpu percent (lastNotNull)": "%CPU",
"Value #gpu decoder utilization (lastNotNull)": "%DEC",
"Value #gpu encoder utilization (lastNotNull)": "%ENC",
"Value #gpu memory (lastNotNull)": "GPU MEMORY",
"Value #gpu memory bandwidth utilization (lastNotNull)": "%GMBW",
"Value #gpu sm utilization (lastNotNull)": "%SM",
"Value #rss memory (lastNotNull)": "RSS MEMORY",
"Value #running time (lastNotNull)": "TIME",
"command (lastNotNull)": "COMMAND",
"hostname": "HOSTNAME",
"index": "DEVICE",
"pid": "PID",
"status (lastNotNull)": "STATUS",
"username": "USERNAME"
}
}
}
],
"type": "table"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 41
},
"id": 11,
"panels": [],
"title": "System",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 10,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 2,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "percent"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 42
},
"id": 12,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "host_cpu_percent_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "CPU Utilization",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 10,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 2,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"unit": "percent"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 42
},
"id": 13,
"options": {
"legend": {
"calcs": [
"lastNotNull"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "host_load_average_1m_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} (1m)",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "host_load_average_5m_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} (5m)",
"range": true,
"refId": "B",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "host_load_average_15m_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} (15m)",
"range": true,
"refId": "C",
"useBackend": false
}
],
"title": "CPU Load Average",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1024,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 2,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"unit": "mbytes"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 56
},
"id": 14,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "host_virtual_memory_used_MiB{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Host Virtual Memory",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1024,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 2,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "mbytes"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 56
},
"id": 15,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "host_swap_memory_used_MiB{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Host Swap Memory",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 70
},
"id": 16,
"panels": [],
"title": "System I/O",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": true,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"axisSoftMin": -1,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 2,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"unit": "MiBs"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/.* TX$/"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 71
},
"id": 17,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum by(hostname) (rate(host_net_io_rx_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} RX",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum by(hostname) (rate(host_net_io_tx_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} TX",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Host Network I/O",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": true,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"axisSoftMin": -1,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 2,
"gradientMode": "opacity",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"unit": "MiBs"
},
"overrides": [
{
"matcher": {
"id": "byRegexp",
"options": "/.* Write$/"
},
"properties": [
{
"id": "custom.transform",
"value": "negative-Y"
}
]
}
]
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 71
},
"id": 18,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum by(hostname) (rate(host_disk_io_read_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} Read",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum by(hostname) (rate(host_disk_io_write_data_MiB{hostname=~\"$hostname\"}[$__rate_interval]))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} Write",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Host Disk I/O",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 85
},
"id": 19,
"panels": [],
"title": "Device",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"max": 100,
"min": 0,
"unit": "percent"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 86
},
"id": 20,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_utilization_Percentage{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU Utilization",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1024,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "mbytes"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 86
},
"id": 21,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_memory_used_MiB{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU Memory",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "MiBs"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 100
},
"id": 22,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_pcie_rx_throughput_MiBps{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU PCIe RX Throughput",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "MiBs"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 100
},
"id": 23,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_pcie_tx_throughput_MiBps{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU PCIe TX Throughput",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "MiBs"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 114
},
"id": 24,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_nvlink_total_rx_throughput_MiBps{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU NVLink RX Throughput",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMax": 1,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"min": 0,
"unit": "MiBs"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 114
},
"id": 25,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_nvlink_total_tx_throughput_MiBps{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU NVLink TX Throughput",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"unit": "watt"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 0,
"y": 128
},
"id": 26,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_power_usage_W{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU Power Usage",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"unit": "celsius"
}
},
"gridPos": {
"h": 14,
"w": 12,
"x": 12,
"y": 128
},
"id": 27,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"mean",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Name",
"sortDesc": false
},
"tooltip": {
"maxHeight": 600,
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "gpu_temperature_C{hostname=~\"$hostname\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{{hostname}} GPU {{index}}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "GPU Temperature",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 41,
"tags": [
"nvitop",
"nvitop-exporter",
"prometheus",
"nvidia",
"gpu",
"gpu process",
"gpu monitoring"
],
"templating": {
"list": [
{
"current": {},
"definition": "label_values(hostname)",
"description": "",
"includeAll": true,
"multi": true,
"name": "hostname",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(hostname)"
},
"refresh": 1,
"regex": "",
"type": "query"
},
{
"current": {},
"definition": "label_values(username)",
"description": "",
"includeAll": true,
"multi": true,
"name": "username",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(username)"
},
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "nvitop-dashboard",
"uid": "bdl3vqwxprhtsa",
"version": 1,
"weekStart": ""
}