Skip to content

Commit

Permalink
Add gitlab-readiness (fix #670)
Browse files Browse the repository at this point in the history
  • Loading branch information
markuslf committed Aug 25, 2023
1 parent e32cb77 commit d87027d
Show file tree
Hide file tree
Showing 18 changed files with 1,094 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Monitoring Plugins:

* gitlab-health (fix #670)
* gitlab-liveness (fix #670)
* gitlab-readiness (fix #670)
* gitlab-version
* openstack-nova-list
* postgresql-version
Expand Down
43 changes: 43 additions & 0 deletions assets/icingaweb2-module-director/all-the-rest.json
Original file line number Diff line number Diff line change
Expand Up @@ -9172,6 +9172,49 @@
"volatile": null,
"zone": null
},
"GitLab Readiness": {
"action_url": null,
"apply_for": null,
"assign_filter": null,
"check_command": null,
"check_interval": null,
"check_period": null,
"check_timeout": null,
"command_endpoint": null,
"disabled": false,
"display_name": null,
"enable_active_checks": null,
"enable_event_handler": null,
"enable_flapping": null,
"enable_notifications": null,
"enable_passive_checks": null,
"enable_perfdata": null,
"event_command": null,
"fields": [],
"flapping_threshold_high": null,
"flapping_threshold_low": null,
"groups": [],
"host": null,
"icon_image": null,
"icon_image_alt": null,
"imports": [
"tpl-service-gitlab-readiness"
],
"max_check_attempts": null,
"notes": null,
"notes_url": null,
"object_name": "GitLab Readiness",
"object_type": "object",
"retry_interval": null,
"service_set": null,
"template_choice": null,
"use_agent": null,
"use_var_overrides": null,
"uuid": "0e062d6a-e33e-49d2-9709-59450bfce9ac",
"vars": {},
"volatile": null,
"zone": null
},
"GitLab Version": {
"action_url": null,
"apply_for": null,
Expand Down
106 changes: 106 additions & 0 deletions check-plugins/gitlab-readiness/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
Check gitlab-readiness
=====================

Overview
--------

The readiness probe checks whether the GitLab instance is ready to accept traffic via Rails Controllers. The check also validates the dependent services (Database, Redis, Gitaly etc.) and gives a status for each.

Hints:

* Requires GitLab 9.1.0+
* To access monitoring resources, the requesting client IP needs to be included in the allowlist. For details, see `how to add IPs to the allowlist for the monitoring endpoints <https://docs.gitlab.com/ee/administration/monitoring/ip_allowlist.html>`.
* This check is being exempt from Rack Attack.
* GitLab Health Checks: https://docs.gitlab.com/ee/administration/monitoring/health_check.html


Fact Sheet
----------

.. csv-table::
:widths: 30, 70

"Check Plugin Download", "https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/gitlab-readiness"
"Check Interval Recommendation", "Once a minute"
"Can be called without parameters", "Yes"
"Compiled for", "Linux"


Help
----

.. code-block:: text
usage: gitlab-readiness [-h] [-V] [--always-ok] [--severity {warn,crit}]
[--test TEST] [--timeout TIMEOUT] [--url URL]
The readiness probe checks whether the GitLab instance is ready to accept
traffic via Rails Controllers. The check also validates the dependent services (Database,
Redis, Gitaly etc.) and gives a status for each.
options:
-h, --help show this help message and exit
-V, --version show program's version number and exit
--always-ok Always returns OK.
--severity {warn,crit}
Severity for alerting. Default: warn
--test TEST For unit tests. Needs "path-to-stdout-file,path-to-
stderr-file,expected-retc".
--timeout TIMEOUT Network timeout in seconds. Default: 3 (seconds)
--url URL GitLab readiness URL endpoint. Default:
http://localhost/-/readiness?all=1
Usage Examples
--------------

.. code-block:: bash
./gitlab-readiness --severity warn --timeout 3 --url http://localhost/-/readiness
Output:

.. code-block:: text
There are issues with gitaly_check. Run `curl http://localhost/-/readiness?all=1` for full results.
Service ! Message
------------------+-------------------------------------------------------------
cache ! Running
chat ! Running
cluster_cache ! Running
db ! Running
db_load_balancing ! Running
feature_flag ! Running
gitaly ! [WARNING] 14:connections to all backends failing; last e...
master ! Running
queues ! Running
rate_limiting ! Running
repository_cache ! Running
sessions ! Running
shared_state ! Running
trace_chunks ! Running
States
------

* Depending on the given ``--severity``, returns WARN (default) or CRIT if readiness and readiness probes to indicate service health and reachability to required services fail.


Perfdata / Metrics
------------------

.. csv-table::
:widths: 25, 15, 60
:header-rows: 1

Name, Type, Description
gitlab-readiness-state, Number, "The current state (0 = OK, 1 = WARN, 2 = CRIT, 3 = UNKNOWN)."


Credits, License
----------------

* Authors: `Linuxfabrik GmbH, Zurich <https://www.linuxfabrik.ch>`_
* License: The Unlicense, see `LICENSE file <https://unlicense.org/>`_.
181 changes: 181 additions & 0 deletions check-plugins/gitlab-readiness/gitlab-readiness
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/env python3
# -*- coding: utf-8; py-indent-offset: 4 -*-
#
# Author: Linuxfabrik GmbH, Zurich, Switzerland
# Contact: info (at) linuxfabrik (dot) ch
# https://www.linuxfabrik.ch/
# License: The Unlicense, see LICENSE file.

# https://github.com/Linuxfabrik/monitoring-plugins/blob/main/CONTRIBUTING.rst

"""See the check's README for more details.
"""

import argparse # pylint: disable=C0413
import json # pylint: disable=C0413
import sys # pylint: disable=C0413

import lib.args # pylint: disable=C0413
import lib.base # pylint: disable=C0413
import lib.test # pylint: disable=C0413
import lib.url # pylint: disable=C0413
from lib.globals import (STATE_CRIT, STATE_OK, # pylint: disable=C0413
STATE_UNKNOWN, STATE_WARN)


__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
__version__ = '2023082501'

DESCRIPTION = """The readiness probe checks whether the GitLab instance is ready to accept
traffic via Rails Controllers.
The check also validates the dependent services (Database, Redis, Gitaly etc.)
and gives a status for each."""

DEFAULT_SEVERITY = 'warn'
DEFAULT_TIMEOUT = 3
DEFAULT_URL = 'http://localhost/-/readiness?all=1'


def parse_args():
"""Parse command line arguments using argparse.
"""
parser = argparse.ArgumentParser(description=DESCRIPTION)

parser.add_argument(
'-V', '--version',
action='version',
version='%(prog)s: v{} by {}'.format(__version__, __author__)
)

parser.add_argument(
'--always-ok',
help='Always returns OK.',
dest='ALWAYS_OK',
action='store_true',
default=False,
)

parser.add_argument(
'--severity',
help='Severity for alerting. Default: %(default)s',
dest='SEVERITY',
default=DEFAULT_SEVERITY,
choices=['warn', 'crit'],
)

parser.add_argument(
'--test',
help='For unit tests. Needs "path-to-stdout-file,path-to-stderr-file,expected-retc".',
dest='TEST',
type=lib.args.csv,
)

parser.add_argument(
'--timeout',
help='Network timeout in seconds. Default: %(default)s (seconds)',
dest='TIMEOUT',
type=int,
default=DEFAULT_TIMEOUT,
)

parser.add_argument(
'--url',
help='GitLab readiness URL endpoint. Default: %(default)s',
dest='URL',
default=DEFAULT_URL,
)

return parser.parse_args()


def main():
"""The main function. Hier spielt die Musik.
"""

# parse the command line, exit with UNKNOWN if it fails
try:
args = parse_args()
except SystemExit:
sys.exit(STATE_UNKNOWN)

if not args.URL.endswith('?all=1'):
args.URL += '?all=1'

# init some vars
state = STATE_OK
table_data = []
issues = []

# fetch and analyze data
if args.TEST is None:
result = lib.base.coe(lib.url.fetch_json(args.URL, timeout=args.TIMEOUT))
else:
# do not call the command, put in test data
stdout, stderr, retc = lib.test.test(args.TEST)
result = json.loads(stdout)

if 'status' in result and 'cache_check' in result and not 'error' in result:
for check in [
'cache_check',
'chat_check',
'cluster_cache_check',
'db_check',
'db_load_balancing_check',
'feature_flag_check',
'gitaly_check',
'master_check',
'queues_check',
'rate_limiting_check',
'repository_cache_check',
'sessions_check',
'shared_state_check',
'trace_chunks_check',
]:
if result.get(check)[0].get('status', '') != 'ok':
check_state = lib.base.str2state(args.SEVERITY)
state = lib.base.get_worst(state, check_state)
check_msg = lib.base.state2str(check_state, suffix=' ') + \
result.get(check)[0].get('message', '')[0:46] + '...'
issues.append(check)
else:
check_state = STATE_OK
check_msg = 'Running'
table_data.append({
'check': check.replace('_check', ''),
'msg': check_msg,
})
if state == STATE_OK:
msg = 'Everything is ok.'
else:
msg = 'There are issues with {}. Run `curl {}` for full results.'.format(
', '.join(issues),
args.URL,
)
elif 'status' in result and 'message' in result and not 'error' in result:
msg = result['message']
state = lib.base.str2state(args.SEVERITY)
elif 'error' in result:
msg = result['error']
state = lib.base.str2state(args.SEVERITY)
else:
msg = 'Unknown error fetching URL {}'.format(args.URL)
state = STATE_UNKNOWN
perfdata = lib.base.get_perfdata('gitlab-readiness', state, None, None, None, 0, STATE_UNKNOWN)

# build the message
if table_data:
msg += '\n\n' + lib.base.get_table(
table_data,
['check', 'msg'],
header=['Service', 'Message'],
)

# over and out
lib.base.oao(msg, state, perfdata, always_ok=args.ALWAYS_OK)


if __name__ == '__main__':
try:
main()
except Exception: # pylint: disable=W0703
lib.base.cu()
Loading

0 comments on commit d87027d

Please sign in to comment.