Skip to content

Commit

Permalink
ping: Check plugin to fast states "error" (fix #691)
Browse files Browse the repository at this point in the history
  • Loading branch information
markuslf committed Nov 22, 2023
1 parent 7eedcbe commit 86af98e
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 69 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ Monitoring Plugins:
* mysql-logfile: State only UNKNOWN if the log is empty and wasn't set deliberately ([PR #716](https://github.com/Linuxfabrik/monitoring-plugins/issues/716), thanks to [Eric Esser](https://github.com/dorkmaneuver))
* openstack-nova-list: Make more robust in case of OpenStack errors
* php-version: Check multiple installed PHP versions (fix [#694](https://github.com/Linuxfabrik/monitoring-plugins/issues/694))
* ping: Check plugin to fast states "error" (fix [#691](https://github.com/Linuxfabrik/monitoring-plugins/issues/691))
* rocketchat-stats: There are new values available (fix [#151](https://github.com/Linuxfabrik/monitoring-plugins/issues/151))
* systemd-unit: Encode unit-name to text before running systemd command
* uptime: Additionally report last reboot time (fix [#190](https://github.com/Linuxfabrik/monitoring-plugins/issues/190)
Expand Down
12 changes: 7 additions & 5 deletions check-plugins/ping/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,22 @@ Help

.. code-block:: text
usage: ping [-h] [-V] [--count COUNT] [-H HOSTNAME] [--interval INTERVAL]
[-t DEADLINE]
usage: ping [-h] [-V] [--always-ok] [--count COUNT] [-H HOSTNAME]
[--interval INTERVAL] [-t DEADLINE]
Sends ICMP ECHO_REQUEST to network hosts using the built-in `ping` command.
optional arguments:
options:
-h, --help show this help message and exit
-V, --version show program's version number and exit
--always-ok Always returns OK.
--count COUNT Stop after sending count ECHO_REQUEST packets.
Default: 5
-H HOSTNAME, --hostname HOSTNAME
The ping destination. Default: 127.0.0.1
--interval INTERVAL Wait interval seconds between sending each packet.
Default: 0.2
Real number allowed with dot as a decimal separator
(regardless locale setup). Default: 0.2
-t DEADLINE, --timeout DEADLINE
Specify a timeout, in seconds, before ping exits
regardless of how many packets have been sent or
Expand All @@ -62,7 +64,7 @@ Output:

.. code-block:: text
PING localhost: 5 packets transmitted, 5 received, 0% packet loss, time 829ms. rtt min/avg/max/mdev = 0.036/0.082/0.103/0.023 ms
PING 192.0.2.10: 10 packets transmitted, 5 received, 50% packet loss, time 187ms. rtt min/avg/max/mdev = 105.659/105.990/106.333/0.225 ms, pipe 6
States
Expand Down
22 changes: 11 additions & 11 deletions check-plugins/ping/icingaweb2-module-director/ping.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,27 @@
"disabled": false,
"fields": [
{
"datafield_id": 1914,
"datafield_id": 1,
"is_required": "n",
"var_filter": null
},
{
"datafield_id": 1915,
"datafield_id": 2,
"is_required": "n",
"var_filter": null
},
{
"datafield_id": 1916,
"datafield_id": 3,
"is_required": "n",
"var_filter": null
},
{
"datafield_id": 1917,
"datafield_id": 4,
"is_required": "n",
"var_filter": null
},
{
"datafield_id": 1918,
"datafield_id": 5,
"is_required": "n",
"var_filter": null
}
Expand Down Expand Up @@ -111,7 +111,7 @@
}
},
"Datafield": {
"1914": {
"1": {
"varname": "ping_always_ok",
"caption": "Ping: Always OK?",
"description": "Always returns OK.",
Expand All @@ -120,7 +120,7 @@
"settings": {},
"uuid": "8b73ad73-f1f2-4ee6-b075-48134f0e63f0"
},
"1915": {
"2": {
"varname": "ping_count",
"caption": "Ping: Count",
"description": "Stop after sending count ECHO_REQUEST packets.",
Expand All @@ -131,7 +131,7 @@
},
"uuid": "d1be5af4-f89a-4dcf-80e2-ac1508fd4ad8"
},
"1916": {
"3": {
"varname": "ping_hostname",
"caption": "Ping: Hostname",
"description": "The ping destination.",
Expand All @@ -142,18 +142,18 @@
},
"uuid": "fcd9fdac-8264-403b-b4a1-699dcdd70e61"
},
"1917": {
"4": {
"varname": "ping_interval",
"caption": "Ping: Interval",
"description": "Wait interval seconds between sending each packet.",
"description": "Wait interval seconds between sending each packet. Real number allowed with dot as a decimal separator (regardless locale setup).",
"datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString",
"format": null,
"settings": {
"visibility": "visible"
},
"uuid": "c8d8ef18-d76f-4037-acd9-6ce28368c01c"
},
"1918": {
"5": {
"varname": "ping_timeout",
"caption": "Ping: Timeout",
"description": "Specify a timeout, in seconds, before ping exits regardless of how many packets have been sent or received.",
Expand Down
191 changes: 138 additions & 53 deletions check-plugins/ping/ping
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@ import sys # pylint: disable=C0413

import lib.base # pylint: disable=C0413
import lib.shell # pylint: disable=C0413
from lib.globals import (STATE_CRIT, STATE_OK, # pylint: disable=C0413
STATE_UNKNOWN, STATE_WARN)
from lib.globals import (STATE_CRIT, STATE_OK, STATE_UNKNOWN)

__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2023071203'
__version__ = '2023112201'

DESCRIPTION = 'Sends ICMP ECHO_REQUEST to network hosts using the built-in `ping` command.'

Expand Down Expand Up @@ -52,30 +51,36 @@ def parse_args():

parser.add_argument(
'--count',
help='Stop after sending count ECHO_REQUEST packets. Default: %(default)s',
help='Stop after sending count ECHO_REQUEST packets. '
'Default: %(default)s',
default=DEFAULT_COUNT,
dest='COUNT',
type=int,
)

parser.add_argument(
'-H', '--hostname',
help='The ping destination. Default: %(default)s',
help='The ping destination. '
'Default: %(default)s',
dest='HOSTNAME',
default=DEFAULT_HOSTNAME,
)

parser.add_argument(
'--interval',
help='Wait interval seconds between sending each packet. Default: %(default)s',
help='Wait interval seconds between sending each packet. '
'Real number allowed with dot as a decimal separator (regardless locale setup). '
'Default: %(default)s',
default=DEFAULT_INTERVAL,
dest='INTERVAL',
type=float,
)

parser.add_argument(
'-t', '--timeout',
help='Specify a timeout, in seconds, before ping exits regardless of how many packets have been sent or received. Default: %(default)s',
help='Specify a timeout, in seconds, before ping exits regardless of how many packets '
'have been sent or received. '
'Default: %(default)s',
default=DEFAULT_DEADLINE,
dest='DEADLINE',
type=int,
Expand All @@ -94,76 +99,156 @@ def main():
except SystemExit:
sys.exit(STATE_UNKNOWN)

cmd = 'ping -c {count} -i {interval} -w {deadline} -q {hostname}'.format(count=args.COUNT,
# ping -c 5 -i 0.2 -w 5 -q 192.0.2.10
cmd = 'ping -c {count} -i {interval} -w {deadline} -q {hostname}'.format(
count=args.COUNT,
interval=args.INTERVAL,
deadline=args.DEADLINE,
hostname=args.HOSTNAME
)

# execute the shell command and return its result and exit code
# fetch data
stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(cmd))
if stderr or retc == 2:
lib.base.cu(stderr)

result = stdout.splitlines()
if not result[0] or not result[3]:
lib.base.cu('Unexpected output from ping.')
# If ping does not receive any reply packets at all it will exit with code 1.
# If a packet count and deadline are both specified, and fewer than count packets are received
# by the time the deadline has arrived, it will also exit with code 1.
# On other error it exits with code 2. Otherwise it exits with code 0.

# throwing CRIT if ping exits with 1 (CRIT instead of WARN beacuse of the
# fact that this check will mainly be used for checking host-liveliness [UP/DOWN]).
# `ping` returns 1 when: !nreceived || (deadline && nreceived < npackets));
state = STATE_CRIT if retc else STATE_OK
# Since we want to be as tolerant as possible, if we send burst pings and at least
# one packet makes its way back, we assume the host is alive. So we don't rely on the
# return code of `ping` (any longer).
# See https://github.com/Linuxfabrik/monitoring-plugins/issues/691 for details.

# init some vars
# Throwing CRIT instead of WARN beacuse of the fact that this check will mainly be used
# for checking host-liveliness [OK=UP, CRIT=DOWN].
state = STATE_CRIT if '0 received' in stdout else STATE_OK
if state == STATE_OK:
msg = ''
else:
msg = 'Destination Host Unreachable. '

# we have the state, lets build the message and create some perfdata;
msg = 'Destination host unreachable. '
perfdata = ''

#['PING www.linuxfabrik.ch (185.231.52.10) 56(84) bytes of data.'
# ''
# '--- www.linuxfabrik.ch ping statistics ---'
# '5 packets transmitted, 5 received, 0% packet loss, time 803ms'
# 'rtt min/avg/max/mdev = 8.926/11.367/17.350/3.184 ms']
# analyze data
result = stdout.splitlines()
if not result[0] or not result[3]:
lib.base.cu('Unexpected output from ping.')

# line 0:
# "PING %hostname (%ip) [from %sinip %device: ]%datalen(%datalen+28)
ping = re.search(r'G (.*?)\(', result[0]) # regex: 45 steps
# 'PING www.linuxfabrik.ch (192.0.2.10) 56(84) bytes of data.'
ping = re.search(r'G (.*?)\(', result[0]) # regex: 45 steps
msg += 'PING {}'.format(ping.group(1).strip()) + ': '

# "%ntransmitted packets transmitted, %nreceived received[, +%nrepeats duplicates][, +%nchecksum corrupted][, +%nerrors errors][, %packetloss% packet loss, time %timems]"
matches = re.search(r'(\d+) packets transmitted, (\d+) received(, \+?(\d+) duplicates)?(, \+?(\d+) checksum corrupted)?(, \+?(\d+) errors)?, (\d+(?:\.\d+)?)% packet loss, time (\d+)', result[3]) # regex: 92 steps
transmitted = matches.group(1)
received = matches.group(2)
duplicates = matches.group(4).replace('+', '') if matches.group(4) else 0
checksum_corrupted = matches.group(6).replace('+', '') if matches.group(6) else 0
errors = matches.group(8).replace('+', '') if matches.group(8) else 0
packet_loss = matches.group(9)
time = matches.group(10)
# line 3:
# "%ntransmitted packets transmitted, %nreceived received[, +%nrepeats duplicates][, +%nchecksum corrupted][, +%nerrors errors][, %packetloss% packet loss, time %timems]" # pylint: disable=C0301
# '5 packets transmitted, 5 received, 0% packet loss, time 803ms'
matches = re.search(
r'(\d+) packets transmitted, (\d+) received(, \+?(\d+) duplicates)?(, \+?(\d+) checksum corrupted)?(, \+?(\d+) errors)?, (\d+(?:\.\d+)?)% packet loss, time (\d+)', # regex: 92 steps # pylint: disable=C0301
result[3],
)
msg += result[3] + '. '
perfdata += lib.base.get_perfdata(
'transmitted',
matches.group(1),
uom=None,
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'received', matches.group(2),
uom=None,
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'duplicates', matches.group(4).replace('+', '') if matches.group(4) else 0,
uom=None,
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'checksum_corrupted', matches.group(6).replace('+', '') if matches.group(6) else 0,
uom=None,
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'errors', matches.group(8).replace('+', '') if matches.group(8) else 0,
uom=None,
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'packet_loss',
matches.group(9),
uom='%',
warn=None,
crit=None,
_min=0,
_max=100,
)
perfdata += lib.base.get_perfdata(
'time', matches.group(10),
uom='ms',
warn=None,
crit=None,
_min=0,
_max=None,
)

# line 4:
# 'rtt min/avg/max/mdev = 8.926/11.367/17.350/3.184 ms'
rtt_min, rtt_avg, rtt_max, rtt_mdev = 0, 0, 0, 0
if result[4] and not result[4].startswith('pipe '):
# host is reachable
matches = re.search(r'= (\d+\.\d+)/(\d+\.\d+)/(\d+\.\d+)/(\d+\.\d+)', result[4]) # regex: 26 steps
rtt_min = matches.group(1)
rtt_avg = matches.group(2)
rtt_max = matches.group(3)
rtt_mdev = matches.group(4)
matches = re.search(r'= (\d+\.\d+)/(\d+\.\d+)/(\d+\.\d+)/(\d+\.\d+)', result[4]) # regex: 26 steps # pylint: disable=C0301
msg += result[4]

perfdata = ''
perfdata += lib.base.get_perfdata('transmitted', transmitted, None, None, None, 0, None)
perfdata += lib.base.get_perfdata('received', received, None, None, None, 0, None)
perfdata += lib.base.get_perfdata('duplicates', duplicates, None, None, None, 0, None)
perfdata += lib.base.get_perfdata('checksum_corrupted', checksum_corrupted, None, None, None, 0, None)
perfdata += lib.base.get_perfdata('errors', errors, None, None, None, 0, None)
perfdata += lib.base.get_perfdata('packet_loss', packet_loss, '%', None, None, 0, 100)
perfdata += lib.base.get_perfdata('time', time, 'ms', None, None, 0, None)

perfdata += lib.base.get_perfdata('rtt_min', rtt_min, 'ms', None, None, 0, None)
perfdata += lib.base.get_perfdata('rtt_avg', rtt_avg, 'ms', None, None, 0, None)
perfdata += lib.base.get_perfdata('rtt_max', rtt_max, 'ms', None, None, 0, None)
perfdata += lib.base.get_perfdata('rtt_mdev', rtt_mdev, 'ms', None, None, 0, None)
perfdata += lib.base.get_perfdata(
'rtt_min', matches.group(1),
uom='ms',
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'rtt_avg', matches.group(2),
uom='ms',
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'rtt_max', matches.group(3),
uom='ms',
warn=None,
crit=None,
_min=0,
_max=None,
)
perfdata += lib.base.get_perfdata(
'rtt_mdev', matches.group(4),
uom='ms',
warn=None,
crit=None,
_min=0,
_max=None,
)

# over and out
lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)
Expand Down

0 comments on commit 86af98e

Please sign in to comment.