diff --git a/CHANGELOG.md b/CHANGELOG.md index 587d67f0..5d8eb091 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,6 +90,7 @@ Monitoring Plugins: * sap-open-concur: Add new parameter `--insecure` * statuspal: Add new parameters `--insecure` `--no-proxy` `--timeout` * swap-usage: Report the top 3 processes causing the usage (Linux only) +* uptime: Use the plugin to warn about recent reboots ([#722](https://github.com/Linuxfabrik/monitoring-plugins/issues/722)) * veeam-status: Add new parameters `--insecure` `--no-proxy` * wildfly-\*: Add new parameters `--insecure` `--no-proxy` * xml: Add new parameter `--insecure` diff --git a/check-plugins/uptime/README.rst b/check-plugins/uptime/README.rst index 05a50620..75fc4144 100644 --- a/check-plugins/uptime/README.rst +++ b/check-plugins/uptime/README.rst @@ -27,32 +27,68 @@ Help usage: uptime [-h] [-V] [--always-ok] [-c CRIT] [-w WARN] - Tell how long the system has been running. + Check how long the system has been running. options: -h, --help show this help message and exit -V, --version show program's version number and exit --always-ok Always returns OK. -c CRIT, --critical CRIT - Set the critical threshold for uptime in days. - Default: 366 + Threshold for the uptime in a human readable format + (10m = 10 minutes; s = seconds, m = minutes, h = + hours, D = days, W = weeks, M = months, Y = years). + Supports Nagios ranges. Example: `:1Y` alerts if + uptime is greater than 1 year.Default: :1Y -w WARN, --warning WARN - Set the warning threshold for uptime in days. Default: - 180 + Threshold for the uptime in a human readable format + (10m = 10 minutes; s = seconds, m = minutes, h = + hours, D = days, W = weeks, M = months, Y = years). + Supports Nagios ranges. Example: `5m:180D` warns if + uptime is not between 5 minutes and 180 days.Default: + 3m:180D Usage Examples -------------- +Warn if more than 180 days, crit if more than 365 days up: + .. code-block:: bash - ./uptime --warning 180 --critical 366 - + ./uptime --warning 180D --critical 1Y + +Output: + +.. code-block:: text + + Up 2W 6h since 2024-03-30 08:08:01 (thresholds 180D/1Y) + +Warn if less than 5 minutes up: + +.. code-block:: bash + + ./uptime --warning 5m: + Output: .. code-block:: text - Up 1W 5D since 2023-05-22 10:36:26 + Up 4m since 2024-03-30 08:08:01 (thresholds 5m:/:1Y) [WARNING] + +Warn if not in 5 minutes to 6 months and 5 days uptime. If more than 2 years up, return crit: + +.. code-block:: bash + + ./uptime --warning 5m:6M5D --critical 2Y + # alternatively: ./uptime --warning '5m:6M 5D' --critical 2Y + +Output over time: + +.. code-block:: text + + Up 1m 39s since 2024-03-30 08:08:01 (thresholds 5m:6M5D/2Y) [WARNING] + Up 6M since 2024-03-30 08:08:01 (thresholds 5m:6M5D/2Y) + Up 6M 6D since 2024-03-30 08:08:01 (thresholds 5m:6M5D/2Y) [WARNING] States diff --git a/check-plugins/uptime/icingaweb2-module-director/uptime.json b/check-plugins/uptime/icingaweb2-module-director/uptime.json index 60a70779..f4101d4d 100644 --- a/check-plugins/uptime/icingaweb2-module-director/uptime.json +++ b/check-plugins/uptime/icingaweb2-module-director/uptime.json @@ -130,7 +130,7 @@ "tpl-service-generic" ], "max_check_attempts": 5, - "notes": "Tell how long the system has been running.", + "notes": "Check how long the system has been running.", "notes_url": "https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/uptime", "object_name": "tpl-service-uptime", "object_type": "template", @@ -142,8 +142,8 @@ "vars": { "criticality": "C", "uptime_always_ok": false, - "uptime_critical": 366, - "uptime_warning": 180 + "uptime_critical": ":1Y", + "uptime_warning": "3m:180D" }, "volatile": null, "zone": null, @@ -178,7 +178,7 @@ "tpl-service-generic" ], "max_check_attempts": 5, - "notes": "Tell how long the system has been running.", + "notes": "Check how long the system has been running.", "notes_url": "https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/uptime", "object_name": "tpl-service-uptime-windows", "object_type": "template", @@ -190,8 +190,8 @@ "vars": { "criticality": "C", "uptime_windows_always_ok": false, - "uptime_windows_critical": 366, - "uptime_windows_warning": 180 + "uptime_windows_critical": ":1Y", + "uptime_windows_warning": "3m:180D" }, "volatile": null, "zone": null, @@ -211,7 +211,7 @@ "2": { "varname": "uptime_critical", "caption": "Uptime: Critical", - "description": "Set the critical threshold for uptime in days.", + "description": "Threshold for the uptime in a human readable format (10m = 10 minutes; s = seconds, m = minutes, h = hours, D = days, W = weeks, M = months, Y = years). Supports Nagios ranges. Example: `:1Y` alerts if uptime is greater than 1 year.Default: %(default)s", "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", "format": null, "settings": { @@ -222,7 +222,7 @@ "3": { "varname": "uptime_warning", "caption": "Uptime: Warning", - "description": "Set the warning threshold for uptime in days.", + "description": "Threshold for the uptime in a human readable format (10m = 10 minutes; s = seconds, m = minutes, h = hours, D = days, W = weeks, M = months, Y = years). Supports Nagios ranges. Example: `5m:180D` warns if uptime is not between 5 minutes and 180 days.Default: %(default)s", "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", "format": null, "settings": { @@ -242,7 +242,7 @@ "5": { "varname": "uptime_windows_critical", "caption": "Uptime: Critical", - "description": "Set the critical threshold for uptime in days.", + "description": "Threshold for the uptime in a human readable format (10m = 10 minutes; s = seconds, m = minutes, h = hours, D = days, W = weeks, M = months, Y = years). Supports Nagios ranges. Example: `:1Y` alerts if uptime is greater than 1 year.Default: %(default)s", "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", "format": null, "settings": { @@ -253,7 +253,7 @@ "6": { "varname": "uptime_windows_warning", "caption": "Uptime: Warning", - "description": "Set the warning threshold for uptime in days.", + "description": "Threshold for the uptime in a human readable format (10m = 10 minutes; s = seconds, m = minutes, h = hours, D = days, W = weeks, M = months, Y = years). Supports Nagios ranges. Example: `5m:180D` warns if uptime is not between 5 minutes and 180 days.Default: %(default)s", "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", "format": null, "settings": { diff --git a/check-plugins/uptime/uptime b/check-plugins/uptime/uptime index 3c31bb5e..458799de 100755 --- a/check-plugins/uptime/uptime +++ b/check-plugins/uptime/uptime @@ -28,12 +28,12 @@ except ImportError: __author__ = 'Linuxfabrik GmbH, Zurich/Switzerland' -__version__ = '2023112901' +__version__ = '2024033001' -DESCRIPTION = 'Tell how long the system has been running.' +DESCRIPTION = 'Check how long the system has been running.' -DEFAULT_WARN = 180 # days -DEFAULT_CRIT = 366 # days +DEFAULT_WARN = '3m:180D' # warn if uptime is not in 3 minutes to 180 days +DEFAULT_CRIT = ':1Y' # crit if uptime is greater than 365 days def parse_args(): @@ -57,17 +57,25 @@ def parse_args(): parser.add_argument( '-c', '--critical', - help='Set the critical threshold for uptime in days. Default: %(default)s', + help='Threshold for the uptime in a human readable format ' + '(10m = 10 minutes; s = seconds, m = minutes, h = hours, ' + 'D = days, W = weeks, M = months, Y = years). ' + 'Supports Nagios ranges. ' + 'Example: `:1Y` alerts if uptime is greater than 1 year.' + 'Default: %(default)s', dest='CRIT', - type=lib.args.int_or_none, default=DEFAULT_CRIT, ) parser.add_argument( '-w', '--warning', - help='Set the warning threshold for uptime in days. Default: %(default)s', + help='Threshold for the uptime in a human readable format ' + '(10m = 10 minutes; s = seconds, m = minutes, h = hours, ' + 'D = days, W = weeks, M = months, Y = years). ' + 'Supports Nagios ranges. ' + 'Example: `5m:180D` warns if uptime is not between 5 minutes and 180 days.' + 'Default: %(default)s', dest='WARN', - type=lib.args.int_or_none, default=DEFAULT_WARN, ) @@ -84,24 +92,24 @@ def main(): except SystemExit: sys.exit(STATE_UNKNOWN) - if args.CRIT is None: - CRIT = None - else: - CRIT = args.CRIT*24*60*60 - - if args.WARN is None: - WARN = None - else: - WARN = args.WARN*24*60*60 + # convert human readable nagios ranges to something that the Linuxfabrik libraries + # can understand + CRIT = lib.human.humanrange2seconds(args.CRIT) + WARN = lib.human.humanrange2seconds(args.WARN) + # fetch data delta = lib.time.now() - psutil.boot_time() - msg = 'Up {} since {}'.format( + # build the message + state = lib.base.get_state(delta, WARN, CRIT, _operator='range') + msg = 'Up {} since {} (thresholds {}/{}){}'.format( lib.human.seconds2human(delta), lib.time.epoch2iso(psutil.boot_time()), + args.WARN, + args.CRIT, + lib.base.state2str(state, prefix=' '), ) - perfdata = lib.base.get_perfdata('uptime', delta, 's', WARN, CRIT, 0, None) - state = lib.base.get_state(delta, WARN, CRIT) + perfdata = lib.base.get_perfdata('uptime', delta, 's', None, None, 0, None) # over and out lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)