diff --git a/CHANGELOG.md b/CHANGELOG.md index 088d3a45..8de86358 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Icinga Director: Monitoring Plugins: +* gitlab-health (fix #670) * gitlab-version * openstack-nova-list * postgresql-version diff --git a/assets/icingaweb2-module-director/all-the-rest.json b/assets/icingaweb2-module-director/all-the-rest.json index 48afb4ad..dcb7e61c 100644 --- a/assets/icingaweb2-module-director/all-the-rest.json +++ b/assets/icingaweb2-module-director/all-the-rest.json @@ -9086,6 +9086,49 @@ "object_name": "GitLab Service Set", "object_type": "template", "services": { + "GitLab Health": { + "action_url": null, + "apply_for": null, + "assign_filter": null, + "check_command": null, + "check_interval": null, + "check_period": null, + "check_timeout": null, + "command_endpoint": null, + "disabled": false, + "display_name": null, + "enable_active_checks": null, + "enable_event_handler": null, + "enable_flapping": null, + "enable_notifications": null, + "enable_passive_checks": null, + "enable_perfdata": null, + "event_command": null, + "fields": [], + "flapping_threshold_high": null, + "flapping_threshold_low": null, + "groups": [], + "host": null, + "icon_image": null, + "icon_image_alt": null, + "imports": [ + "tpl-service-gitlab-health" + ], + "max_check_attempts": null, + "notes": null, + "notes_url": null, + "object_name": "GitLab Health", + "object_type": "object", + "retry_interval": null, + "service_set": null, + "template_choice": null, + "use_agent": null, + "use_var_overrides": null, + "uuid": "161fb5d8-24ed-4bdf-b990-1818e9ab57c2", + "vars": {}, + "volatile": null, + "zone": null + }, "GitLab Version": { "action_url": null, "apply_for": null, diff --git a/check-plugins/gitlab-health/README.rst b/check-plugins/gitlab-health/README.rst new file mode 100644 index 00000000..24070ded --- /dev/null +++ b/check-plugins/gitlab-health/README.rst @@ -0,0 +1,86 @@ +Check example +============= + +Overview +-------- + +Checks whether the GitLab application server is running. It does not hit the database or verifies other services are running. Its purpose is to notify that the application server is handling requests, but a STATE_OK response does not signify that the database or other services are ready. + +Hints: + +* To access monitoring resources, the requesting client IP needs to be included in the allowlist. For details, see `how to add IPs to the allowlist for the monitoring endpoints `. +* GitLab Health Checks: https://docs.gitlab.com/ee/administration/monitoring/health_check.html + + +Fact Sheet +---------- + +.. csv-table:: + :widths: 30, 70 + + "Check Plugin Download", "https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/gitlab-health" + "Check Interval Recommendation", "Once a minute" + "Can be called without parameters", "Yes" + "Compiled for", "Linux" + + +Help +---- + +.. code-block:: text + + usage: gitlab-health [-h] [-V] [--always-ok] [--severity {warn,crit}] + [--test TEST] [--timeout TIMEOUT] [--url URL] + + Checks whether the GitLab application server is running. It does not hit the + database or verifies other services are running. + + options: + -h, --help show this help message and exit + -V, --version show program's version number and exit + --always-ok Always returns OK. + --severity {warn,crit} + Severity for alerting. Default: warn + --test TEST For unit tests. Needs "path-to-stdout-file,path-to- + stderr-file,expected-retc". + --timeout TIMEOUT Network timeout in seconds. Default: 3 (seconds) + --url URL GitLab health URL endpoint. Default: + http://localhost/-/health + + +Usage Examples +-------------- + +.. code-block:: bash + + ./gitlab-health --severity warn --timeout 3 --url http://localhost/-/health + +Output: + +.. code-block:: text + + The GitLab application server is processing requests, but this does not mean that the database or other services are ready. + + +States +------ + +* Depending on the given ``--severity``, returns WARN (default) or CRIT if liveness and readiness probes to indicate service health and reachability to required services fail. + + +Perfdata / Metrics +------------------ + +.. csv-table:: + :widths: 25, 15, 60 + :header-rows: 1 + + Name, Type, Description + gitlab-health-state, Number, "The current state (0 = OK, 1 = WARN, 2 = CRIT, 3 = UNKNOWN)." + + +Credits, License +---------------- + +* Authors: `Linuxfabrik GmbH, Zurich `_ +* License: The Unlicense, see `LICENSE file `_. diff --git a/check-plugins/gitlab-health/gitlab-health b/check-plugins/gitlab-health/gitlab-health new file mode 100755 index 00000000..ad6bdb81 --- /dev/null +++ b/check-plugins/gitlab-health/gitlab-health @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; py-indent-offset: 4 -*- +# +# Author: Linuxfabrik GmbH, Zurich, Switzerland +# Contact: info (at) linuxfabrik (dot) ch +# https://www.linuxfabrik.ch/ +# License: The Unlicense, see LICENSE file. + +# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.rst + +"""See the check's README for more details. +""" + +import argparse # pylint: disable=C0413 +import sys # pylint: disable=C0413 + +import lib.args # pylint: disable=C0413 +import lib.base # pylint: disable=C0413 +import lib.test # pylint: disable=C0413 +import lib.url # pylint: disable=C0413 +from lib.globals import (STATE_CRIT, STATE_OK, # pylint: disable=C0413 + STATE_UNKNOWN, STATE_WARN) + + +__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland' +__version__ = '2023082401' + +DESCRIPTION = """Checks whether the GitLab application server is running. It does not hit + the database or verifies other services are running.""" + +DEFAULT_SEVERITY = 'warn' +DEFAULT_TIMEOUT = 3 +DEFAULT_URL = 'http://localhost/-/health' + + +def parse_args(): + """Parse command line arguments using argparse. + """ + parser = argparse.ArgumentParser(description=DESCRIPTION) + + parser.add_argument( + '-V', '--version', + action='version', + version='%(prog)s: v{} by {}'.format(__version__, __author__) + ) + + parser.add_argument( + '--always-ok', + help='Always returns OK.', + dest='ALWAYS_OK', + action='store_true', + default=False, + ) + + parser.add_argument( + '--severity', + help='Severity for alerting. Default: %(default)s', + dest='SEVERITY', + default=DEFAULT_SEVERITY, + choices=['warn', 'crit'], + ) + + parser.add_argument( + '--test', + help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".', + dest='TEST', + type=lib.args.csv, + ) + + parser.add_argument( + '--timeout', + help='Network timeout in seconds. Default: %(default)s (seconds)', + dest='TIMEOUT', + type=int, + default=DEFAULT_TIMEOUT, + ) + + parser.add_argument( + '--url', + help='GitLab health URL endpoint. Default: %(default)s', + dest='URL', + default=DEFAULT_URL, + ) + + return parser.parse_args() + + +def main(): + """The main function. Hier spielt die Musik. + """ + + # parse the command line, exit with UNKNOWN if it fails + try: + args = parse_args() + except SystemExit: + sys.exit(STATE_UNKNOWN) + + # init some vars + state = STATE_OK + + # fetch and analyze data + if args.TEST is None: + result = lib.base.coe(lib.url.fetch(args.URL, timeout=args.TIMEOUT)) + else: + # do not call the command, put in test data + result, stderr, retc = lib.test.test(args.TEST) + + if result == 'GitLab OK': + msg = 'The GitLab application server is processing requests, but this does not mean ' \ + 'that the database or other services are ready.' + else: + msg = 'The GitLab application server seems to have a problem.' + state = lib.base.str2state(args.SEVERITY) + perfdata = lib.base.get_perfdata('gitlab-health', state, None, None, None, 0, STATE_UNKNOWN) + + # over and out + lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK) + + +if __name__ == '__main__': + try: + main() + except Exception: # pylint: disable=W0703 + lib.base.cu() diff --git a/check-plugins/gitlab-health/grafana/gitlab-health.yml b/check-plugins/gitlab-health/grafana/gitlab-health.yml new file mode 100644 index 00000000..a67d93ab --- /dev/null +++ b/check-plugins/gitlab-health/grafana/gitlab-health.yml @@ -0,0 +1,115 @@ +apiVersion: grizzly.grafana.com/v1alpha1 +kind: Dashboard +metadata: + folder: linuxfabrik-monitoring-plugins + name: gitlab-health +spec: + schemaVersion: 2023041201 + tags: + - Linuxfabrik + - Grizzly + - static + time: + from: now-90d + to: now + timepicker: + hidden: false + refresh_intervals: + - 1m + timezone: browser + title: GitLab Health + uid: linuxfabrik-monitoring-plugins-gitlab-health + editable: true + liveNow: true + refresh: 1m + templating: + list: + - hide: 2 + label: Command + name: command + query: cmd-check-gitlab-health + type: constant + - label: Hostname + name: hostname + query: SHOW TAG VALUES FROM "cmd-check-gitlab-health" WITH KEY = "hostname" + refresh: 2 + sort: 1 + type: query + + panels: + + - title: GitLab Health + type: timeseries + gridPos: + h: 8 + w: 12 + x: 12 + y: 8 + fieldConfig: + defaults: + color: + mode: palette-classic + custom: + lineInterpolation: smooth + spanNulls: true + decimals: 0 + max: 3 + min: 0 + unit: short + overrides: + - matcher: + id: byName + options: gitlab-health + properties: + - id: mappings + value: + - options: + '0': + text: OK + '1': + text: WARN + '2': + text: CRIT + '3': + text: UNKN + type: value + options: + legend: + calcs: + - min + - max + displayMode: table + placement: bottom + showLegend: true + tooltip: + mode: multi + sort: none + + targets: + + - alias: gitlab-health + refId: gitlab-health + groupBy: + - params: + - $interval + type: time + measurement: /^$command$/ + resultFormat: time_series + select: + - - params: + - value + type: field + - params: [] + type: mean + tags: + - key: hostname + operator: '=~' + value: /^$hostname$/ + - condition: AND + key: service + operator: '=' + value: GitLab Health + - condition: AND + key: metric + operator: '=' + value: gitlab-health diff --git a/check-plugins/gitlab-health/icingaweb2-module-director/gitlab-health.json b/check-plugins/gitlab-health/icingaweb2-module-director/gitlab-health.json new file mode 100644 index 00000000..ec06ba40 --- /dev/null +++ b/check-plugins/gitlab-health/icingaweb2-module-director/gitlab-health.json @@ -0,0 +1,171 @@ +{ + "Command": { + "cmd-check-gitlab-health": { + "arguments": { + "--always-ok": { + "set_if": "$gitlab_health_always_ok$" + }, + "--severity": { + "value": "$gitlab_health_severity$" + }, + "--timeout": { + "value": "$gitlab_health_timeout$" + }, + "--url": { + "value": "$gitlab_health_url$" + } + }, + "command": "/usr/lib64/nagios/plugins/gitlab-health", + "disabled": false, + "fields": [ + { + "datafield_id": 1, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 2, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 3, + "is_required": "n", + "var_filter": null + }, + { + "datafield_id": 4, + "is_required": "n", + "var_filter": null + } + ], + "imports": [], + "is_string": null, + "methods_execute": "PluginCheck", + "object_name": "cmd-check-gitlab-health", + "object_type": "object", + "timeout": "10", + "vars": {}, + "zone": null, + "uuid": "dfea07b5-9c0b-4946-a4bf-bef39ecbc712" + } + }, + "ServiceTemplate": { + "tpl-service-gitlab-health": { + "action_url": null, + "apply_for": null, + "assign_filter": null, + "check_command": "cmd-check-gitlab-health", + "check_interval": 60, + "check_period": null, + "check_timeout": null, + "command_endpoint": null, + "disabled": false, + "display_name": null, + "enable_active_checks": null, + "enable_event_handler": null, + "enable_flapping": null, + "enable_notifications": true, + "enable_passive_checks": null, + "enable_perfdata": true, + "event_command": null, + "fields": [], + "flapping_threshold_high": null, + "flapping_threshold_low": null, + "groups": [], + "host": null, + "icon_image": "gitlab-health.png", + "icon_image_alt": null, + "imports": [ + "tpl-service-generic" + ], + "max_check_attempts": 5, + "notes": "Checks whether the GitLab application server is running. It does not hit the database or verifies other services are running.", + "notes_url": "https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/gitlab-health", + "object_name": "tpl-service-gitlab-health", + "object_type": "template", + "retry_interval": 15, + "service_set": null, + "template_choice": null, + "use_agent": null, + "use_var_overrides": null, + "vars": { + "criticality": "C", + "gitlab_health_always_ok": false, + "gitlab_health_severity": "warn", + "gitlab_health_timeout": 3, + "gitlab_health_url": "http://localhost/-/health" + }, + "volatile": null, + "zone": null, + "uuid": "5659e80e-7ef7-431e-bfda-906a3665e19c" + } + }, + "DataList": { + "gitlab_health_severity_list": { + "list_name": "gitlab_health_severity_list", + "owner": "icinga-admin", + "entries": [ + { + "entry_name": "warn", + "entry_value": "Warn", + "format": "string", + "allowed_roles": null + }, + { + "entry_name": "crit", + "entry_value": "Crit", + "format": "string", + "allowed_roles": null + } + ], + "uuid": "499b2477-e625-4ebd-82eb-a066bbfb0fa8" + } + }, + "Datafield": { + "1": { + "varname": "gitlab_health_always_ok", + "caption": "Gitlab Health: Always OK?", + "description": "Always returns OK.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeBoolean", + "format": null, + "settings": {}, + "uuid": "ec81f86f-3219-4b91-ab7d-50528f033df1" + }, + "2": { + "varname": "gitlab_health_severity", + "caption": "Gitlab Health: Severity", + "description": "Severity for alerting.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeDatalist", + "format": null, + "settings": { + "behavior": "strict", + "data_type": "string", + "datalist": "gitlab_health_severity_list" + }, + "uuid": "7ade705a-13a3-497e-9e6d-2453585349ee" + }, + "3": { + "varname": "gitlab_health_timeout", + "caption": "Gitlab Health: Timeout", + "description": "Network timeout in seconds.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", + "format": null, + "settings": { + "visibility": "visible" + }, + "uuid": "f8292335-7d2a-4b53-858e-523e3786e474" + }, + "4": { + "varname": "gitlab_health_url", + "caption": "Gitlab Health: URL", + "description": "GitLab health URL endpoint.", + "datatype": "Icinga\\Module\\Director\\DataType\\DataTypeString", + "format": null, + "settings": { + "visibility": "visible" + }, + "uuid": "aa8db664-9a90-4cec-b251-8fee00b261c8" + } + } +} diff --git a/check-plugins/gitlab-health/icingaweb2-module-director/gitlab-health.yml b/check-plugins/gitlab-health/icingaweb2-module-director/gitlab-health.yml new file mode 100644 index 00000000..d34d532b --- /dev/null +++ b/check-plugins/gitlab-health/icingaweb2-module-director/gitlab-health.yml @@ -0,0 +1,6 @@ +--- +variants: + - linux + +overwrites: + '["ServiceTemplate"]["tpl-service-gitlab-health"]["enable_perfdata"]': true diff --git a/check-plugins/gitlab-health/icingaweb2-module-grafana/gitlab-health.ini b/check-plugins/gitlab-health/icingaweb2-module-grafana/gitlab-health.ini new file mode 100644 index 00000000..20ce6388 --- /dev/null +++ b/check-plugins/gitlab-health/icingaweb2-module-grafana/gitlab-health.ini @@ -0,0 +1,6 @@ +[cmd-check-gitlab-health] +dashboard = "GitLab Health" +panelId = "1" +orgId = "" +repeatable = "no" +dashboarduid = "gitlab-health" diff --git a/check-plugins/gitlab-health/lib b/check-plugins/gitlab-health/lib new file mode 120000 index 00000000..58677ddb --- /dev/null +++ b/check-plugins/gitlab-health/lib @@ -0,0 +1 @@ +../../lib \ No newline at end of file diff --git a/check-plugins/gitlab-health/unit-test/run b/check-plugins/gitlab-health/unit-test/run new file mode 100755 index 00000000..2625b8a2 --- /dev/null +++ b/check-plugins/gitlab-health/unit-test/run @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8; py-indent-offset: 4 -*- +# +# Author: Linuxfabrik GmbH, Zurich, Switzerland +# Contact: info (at) linuxfabrik (dot) ch +# https://www.linuxfabrik.ch/ +# License: The Unlicense, see LICENSE file. + +# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.rst + +import sys +sys.path.append("..") # Adds higher directory to python modules path. + + + +import unittest + +from lib.globals import STATE_OK, STATE_UNKNOWN, STATE_WARN, STATE_CRIT +import lib.base +import lib.shell + + +class TestCheck(unittest.TestCase): + + check = '../gitlab-health' + + #self.assertEqual(retc, STATE_CRIT) + #self.assertIn('Waiting for more data (1).', stdout) + #self.assertRegex(stdout, r'1 error in Kernel Ring Buffer.') + + def test_if_check_runs_EXAMPLE01(self): + stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --test=stdout/EXAMPLE01,,0')) + self.assertIn('The GitLab application server is processing requests, but this does not mean that the database or other services are ready.', stdout) + self.assertEqual(stderr, '') + self.assertEqual(retc, STATE_OK) + + def test_if_check_runs_EXAMPLE02(self): + stdout, stderr, retc = lib.base.coe(lib.shell.shell_exec(self.check + ' --test=stdout/EXAMPLE02,,0')) + self.assertIn('The GitLab application server seems to have a problem', stdout) + self.assertEqual(stderr, '') + self.assertEqual(retc, STATE_WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/check-plugins/gitlab-health/unit-test/stdout/EXAMPLE01 b/check-plugins/gitlab-health/unit-test/stdout/EXAMPLE01 new file mode 100644 index 00000000..d6ce93a9 --- /dev/null +++ b/check-plugins/gitlab-health/unit-test/stdout/EXAMPLE01 @@ -0,0 +1 @@ +GitLab OK \ No newline at end of file diff --git a/check-plugins/gitlab-health/unit-test/stdout/EXAMPLE02 b/check-plugins/gitlab-health/unit-test/stdout/EXAMPLE02 new file mode 100644 index 00000000..e69de29b