Skip to content

Commit

Permalink
Fix dashboard issues (#284)
Browse files Browse the repository at this point in the history
  • Loading branch information
vishiy authored Sep 22, 2022
1 parent 421faa4 commit 2fb80f1
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 39 deletions.
2 changes: 1 addition & 1 deletion mixins/kubernetes/dashboards/resources/workload.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,6 @@ local template = grafana.template;
g.stack +
{ yaxes: g.yaxes('pps') },
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, workloadTemplate, workloadTypeTemplate] }, refresh: $._config.grafanaK8s.refresh },
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, workloadTypeTemplate, workloadTemplate] }, refresh: $._config.grafanaK8s.refresh },
},
}
90 changes: 52 additions & 38 deletions mixins/node/lib/prom-mixin.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ local graphPanel = grafana.graphPanel;
local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet';
local gaugePanel = grafana70.panel.gauge;
local table = grafana70.panel.table;
local c = import '../config.libsonnet';

{

Expand All @@ -27,6 +28,18 @@ local table = grafana70.panel.table;
type: 'datasource',
},

local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(node_time_seconds, %s)' % config.clusterLabel,
current='',
hide=if config.showMultiCluster then '' else '2',
refresh=2,
includeAll=false,
sort=1
),

local instanceTemplatePrototype =
template.new(
'instance',
Expand All @@ -38,10 +51,10 @@ local table = grafana70.panel.table;
local instanceTemplate =
if platform == 'Darwin' then
instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, instance)' % config }
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin", %(clusterLabel)s="$cluster"}, instance)' % config }
else
instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, instance)' % config },
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin", %(clusterLabel)s="$cluster"}, instance)' % config },


local idleCPU =
Expand All @@ -57,9 +70,9 @@ local table = grafana70.panel.table;
.addTarget(prometheus.target(
|||
(
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance"}[$__rate_interval])))
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval])))
/ ignoring(cpu) group_left
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"})
)
||| % config,
legendFormat='{{cpu}}',
Expand All @@ -75,10 +88,10 @@ local table = grafana70.panel.table;
min=0,
fill=0,
)
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='1m load average'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='5m load average'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='15m load average'))
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle"})' % config, legendFormat='logical cores')),
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average'))
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average'))
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average'))
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", mode="idle", %(clusterLabel)s="$cluster"})' % config, legendFormat='logical cores')),

local memoryGraphPanelPrototype =
graphPanel.new(
Expand All @@ -94,44 +107,44 @@ local table = grafana70.panel.table;
.addTarget(prometheus.target(
|||
(
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
-
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
-
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
-
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
)
||| % config,
legendFormat='memory used'
))
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='memory free'))
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers'))
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached'))
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free'))
else if platform == 'Darwin' then
// not useful to stack
memoryGraphPanelPrototype { stack: false }
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Physical Memory'))
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory'))
.addTarget(prometheus.target(
|||
(
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"} +
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
)
||| % config, legendFormat='Memory Used'
))
.addTarget(prometheus.target(
|||
(
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
)
||| % config, legendFormat='App Memory'
))
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Wired Memory'))
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"}' % config, legendFormat='Compressed')),
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory'))
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')),

// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
local memoryGaugePanelPrototype =
Expand All @@ -155,8 +168,8 @@ local table = grafana70.panel.table;
|||
100 -
(
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance"}) /
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance"})
avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) /
avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
* 100
)
||| % config,
Expand All @@ -168,12 +181,12 @@ local table = grafana70.panel.table;
|||
(
(
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance"}) -
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance"}) +
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance"})
avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) -
avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) +
avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
) /
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance"})
avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"})
)
*
100
Expand All @@ -190,17 +203,17 @@ local table = grafana70.panel.table;
)
// TODO: Does it make sense to have those three in the same panel?
.addTarget(prometheus.target(
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s, %(clusterLabel)s="$cluster"}[$__rate_interval])' % config,
legendFormat='{{device}} read',
intervalFactor=1,
))
.addTarget(prometheus.target(
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s, %(clusterLabel)s="$cluster"}[$__rate_interval])' % config,
legendFormat='{{device}} written',
intervalFactor=1,
))
.addTarget(prometheus.target(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__rate_interval])' % config,
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s, %(clusterLabel)s="$cluster"}[$__rate_interval])' % config,
legendFormat='{{device}} io time',
intervalFactor=1,
)) +
Expand Down Expand Up @@ -232,15 +245,15 @@ local table = grafana70.panel.table;
.addThresholdStep(color='red', value=0.9)
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s})
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(clusterLabel)s="$cluster"})
||| % config,
legendFormat='',
instant=true,
format='table'
))
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s})
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(fsSelector)s, %(clusterLabel)s="$cluster"})
||| % config,
legendFormat='',
instant=true,
Expand Down Expand Up @@ -421,7 +434,7 @@ local table = grafana70.panel.table;
fill=0,
)
.addTarget(prometheus.target(
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config,
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo", %(clusterLabel)s="$cluster"}[$__rate_interval]) * 8' % config,
legendFormat='{{device}}',
intervalFactor=1,
)),
Expand All @@ -437,7 +450,7 @@ local table = grafana70.panel.table;
fill=0,
)
.addTarget(prometheus.target(
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__rate_interval]) * 8' % config,
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo", %(clusterLabel)s="$cluster"}[$__rate_interval]) * 8' % config,
legendFormat='{{device}}',
intervalFactor=1,
)),
Expand Down Expand Up @@ -473,6 +486,7 @@ local table = grafana70.panel.table;
local templates =
[
prometheusDatasourceTemplate,
clusterTemplate,
instanceTemplate,
],

Expand Down

0 comments on commit 2fb80f1

Please sign in to comment.