Skip to content

Commit

Permalink
fix and update node mixin
Browse files Browse the repository at this point in the history
  • Loading branch information
vishiy committed Jul 29, 2022
1 parent 826a296 commit 0fd726f
Show file tree
Hide file tree
Showing 7 changed files with 539 additions and 303 deletions.
21 changes: 12 additions & 9 deletions mixins/node/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Node Mixin

Last synced on 11/9/2021 to commit - https://github.com/prometheus/node_exporter/commit/3e6f4ce627e588e9972e624f1f744c716e11b199
Last synced on 07/28/2022 to commit - https://github.com/prometheus/node_exporter/commit/7519830a8a9fb7e2321655031692526498f7cb9a

_This is a work in progress. We aim for it to become a good role model for alerts
and dashboards eventually, but it is not quite there yet._
Expand All @@ -12,33 +12,36 @@ for Grafana.

To use them, you need to have `jsonnet` (v0.16+) and `jb` installed. If you
have a working Go development environment, it's easiest to run the following:

```bash
$ go install github.com/google/go-jsonnet/cmd/jsonnet@latest
$ go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
$ go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest
go install github.com/google/go-jsonnet/cmd/jsonnet@latest
go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest
```

Next, install the dependencies by running the following command in this
directory:

```bash
$ jb install
jb install
```

You can then build the Prometheus rules files `node_alerts.yaml` and
`node_rules.yaml`:

```bash
$ make node_alerts.yaml node_rules.yaml
make node_alerts.yaml node_rules.yaml
```

You can also build a directory `dashboard_out` with the JSON dashboard files
for Grafana:

```bash
$ make dashboards_out
make dashboards_out
```

Note that some of the generated dashboards require recording rules specified in
the previously generated `node_rules.yaml`.

For more advanced uses of mixins, see
https://github.com/monitoring-mixins/docs.

<https://github.com/monitoring-mixins/docs>.
28 changes: 14 additions & 14 deletions mixins/node/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
alert: 'NodeFilesystemAlmostOutOfSpace',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableCriticalThreshold)d
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableWarningThreshold)d
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
)
Expand All @@ -58,15 +58,15 @@
severity: 'warning',
},
annotations: {
summary: 'Filesystem has less than %(fsSpaceAvailableCriticalThreshold)d%% space left.' % $._config,
summary: 'Filesystem has less than %(fsSpaceAvailableWarningThreshold)d%% space left.' % $._config,
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
},
},
{
alert: 'NodeFilesystemAlmostOutOfSpace',
expr: |||
(
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableWarningThreshold)d
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceAvailableCriticalThreshold)d
and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s} == 0
)
Expand All @@ -76,7 +76,7 @@
severity: '%(nodeCriticalSeverity)s' % $._config,
},
annotations: {
summary: 'Filesystem has less than %(fsSpaceAvailableWarningThreshold)d%% space left.' % $._config,
summary: 'Filesystem has less than %(fsSpaceAvailableCriticalThreshold)d%% space left.' % $._config,
description: 'Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
},
},
Expand Down Expand Up @@ -214,15 +214,15 @@
alert: 'NodeClockSkewDetected',
expr: |||
(
node_timex_offset_seconds > 0.05
node_timex_offset_seconds{%(nodeExporterSelector)s} > 0.05
and
deriv(node_timex_offset_seconds[5m]) >= 0
deriv(node_timex_offset_seconds{%(nodeExporterSelector)s}[5m]) >= 0
)
or
(
node_timex_offset_seconds < -0.05
node_timex_offset_seconds{%(nodeExporterSelector)s} < -0.05
and
deriv(node_timex_offset_seconds[5m]) <= 0
deriv(node_timex_offset_seconds{%(nodeExporterSelector)s}[5m]) <= 0
)
||| % $._config,
'for': '10m',
Expand All @@ -237,9 +237,9 @@
{
alert: 'NodeClockNotSynchronising',
expr: |||
min_over_time(node_timex_sync_status[5m]) == 0
min_over_time(node_timex_sync_status{%(nodeExporterSelector)s}[5m]) == 0
and
node_timex_maxerror_seconds >= 16
node_timex_maxerror_seconds{%(nodeExporterSelector)s} >= 16
||| % $._config,
'for': '10m',
labels: {
Expand All @@ -253,7 +253,7 @@
{
alert: 'NodeRAIDDegraded',
expr: |||
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
node_md_disks_required{%(nodeExporterSelector)s,%(diskDeviceSelector)s} - ignoring (state) (node_md_disks{state="active",%(nodeExporterSelector)s,%(diskDeviceSelector)s}) > 0
||| % $._config,
'for': '15m',
labels: {
Expand All @@ -267,7 +267,7 @@
{
alert: 'NodeRAIDDiskFailure',
expr: |||
node_md_disks{state="failed"} > 0
node_md_disks{state="failed",%(nodeExporterSelector)s,%(diskDeviceSelector)s} > 0
||| % $._config,
labels: {
severity: 'warning',
Expand All @@ -281,7 +281,7 @@
alert: 'NodeFileDescriptorLimit',
expr: |||
(
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
node_filefd_allocated{%(nodeExporterSelector)s} * 100 / node_filefd_maximum{%(nodeExporterSelector)s} > 70
)
||| % $._config,
'for': '15m',
Expand All @@ -297,7 +297,7 @@
alert: 'NodeFileDescriptorLimit',
expr: |||
(
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
node_filefd_allocated{%(nodeExporterSelector)s} * 100 / node_filefd_maximum{%(nodeExporterSelector)s} > 90
)
||| % $._config,
'for': '15m',
Expand Down
4 changes: 2 additions & 2 deletions mixins/node/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@

// Available disk space (%) thresholds on which to trigger the
// 'NodeFilesystemAlmostOutOfSpace' alerts.
fsSpaceAvailableCriticalThreshold: 5,
fsSpaceAvailableWarningThreshold: 3,
fsSpaceAvailableWarningThreshold: 5,
fsSpaceAvailableCriticalThreshold: 3,

rateInterval: '5m',
// Opt-in for multi-cluster support.
Expand Down
Loading

0 comments on commit 0fd726f

Please sign in to comment.