Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Node metrics #166

Merged
merged 20 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ansible/deploy-clickhouse.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
- name: Deploy oonidata clickhouse hosts
hosts:
- notebook.ooni.org
- notebook1.htz-fsn.prod.ooni.nu
- data1.htz-fsn.prod.ooni.nu
# - data2.htz-fsn.prod.ooni.nu
- data3.htz-fsn.prod.ooni.nu
Expand Down
5 changes: 4 additions & 1 deletion ansible/deploy-notebook.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
---
- name: Deploy notebook host
hosts: notebook.ooni.org
hosts: notebook1.htz-fsn.prod.ooni.nu
become: true
tags:
- notebook
vars:
enable_oonipipeline_worker: false
ssl_domains:
- "{{ inventory_hostname }}"
- "notebook.ooni.org"
roles:
- oonidata
10 changes: 5 additions & 5 deletions ansible/group_vars/clickhouse/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ nftables_clickhouse_allow:
ip: 88.198.54.12
- fqdn: data3.htz-fsn.prod.ooni.nu
ip: 168.119.7.188
- fqdn: notebook.ooni.org
- fqdn: notebook1.htz-fsn.prod.ooni.nu
ip: 138.201.19.39
- fqdn: backend-hel.ooni.org
ip: 65.108.192.151
Expand All @@ -19,7 +19,7 @@ nftables_zookeeper_allow:
ip: 88.198.54.12
- fqdn: data3.htz-fsn.prod.ooni.nu
ip: 168.119.7.188
- fqdn: notebook.ooni.org
- fqdn: notebook1.htz-fsn.prod.ooni.nu
ip: 138.201.19.39

clickhouse_version: 24.8.6.70
Expand Down Expand Up @@ -94,9 +94,9 @@ clickhouse_keeper:
port: 9234

- keeper_server:
server: notebook.ooni.org
server: notebook1.htz-fsn.prod.ooni.nu
id: 4
hostname: notebook.ooni.org
hostname: notebook1.htz-fsn.prod.ooni.nu
port: 9234

clickhouse_zookeeper:
Expand All @@ -107,7 +107,7 @@ clickhouse_zookeeper:
host: clickhouse3.prod.ooni.io
port: 9181
- node:
host: notebook.ooni.org
host: notebook1.htz-fsn.prod.ooni.nu
port: 9181

clickhouse_remote_servers:
Expand Down
2 changes: 0 additions & 2 deletions ansible/host_vars/oonidata.ooni.org

This file was deleted.

4 changes: 2 additions & 2 deletions ansible/inventory
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ghs_ams
## Role tags

[clickhouse]
notebook.ooni.org
notebook1.htz-fsn.prod.ooni.nu
data1.htz-fsn.prod.ooni.nu
data3.htz-fsn.prod.ooni.nu

Expand All @@ -16,7 +16,7 @@ data1.htz-fsn.prod.ooni.nu

[htz_fsn]
monitoring.ooni.org
notebook.ooni.org
notebook1.htz-fsn.prod.ooni.nu
data1.htz-fsn.prod.ooni.nu
data3.htz-fsn.prod.ooni.nu
#backend-fsn.ooni.org
Expand Down
36 changes: 18 additions & 18 deletions ansible/roles/monitoring/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
---
# # monitoring host # #

- name: Create Grafana repo GPG pubkey
tags: apt
template:
src: templates/grafana.asc
dest: /etc/apt/grafana.asc
mode: 0644
owner: root

- name: Set grafana apt repo
tags: monitoring, grafana
template:
Expand All @@ -9,6 +17,14 @@
mode: 0644
owner: root

- name: Create Grafana sources list
tags: apt
template:
src: templates/grafana.sources
dest: /etc/apt/sources.list.d/grafana.sources
mode: 0644
owner: root

- name: Installs packages
tags: monitoring, prometheus
apt:
Expand Down Expand Up @@ -37,22 +53,6 @@
mode: 0644
owner: root

- name: Create Grafana repo GPG pubkey
tags: apt
template:
src: templates/grafana.gpg
dest: /etc/apt/grafana.asc
mode: 0644
owner: root

- name: Create Grafana sources list
tags: apt
template:
src: templates/grafana.sources
dest: /etc/apt/sources.list.d/grafana.sources
mode: 0644
owner: root

- name: Installs grafana
tags: monitoring, grafana
apt:
Expand All @@ -65,7 +65,7 @@
tags: monitoring, grafana
lineinfile:
path: /etc/grafana/grafana.ini
regexp: '^;?domain = '
regexp: "^;?domain = "
line: domain = grafana.ooni.org

- name: Autoremove
Expand Down Expand Up @@ -188,7 +188,7 @@
tags: fail2ban
lineinfile:
path: /etc/fail2ban/jail.conf
regexp: '^backend '
regexp: "^backend "
line: backend = systemd

- name: Configure fail2ban
Expand Down
2 changes: 1 addition & 1 deletion ansible/roles/monitoring/templates/grafana.list
Original file line number Diff line number Diff line change
@@ -1 +1 @@
deb https://packages.grafana.com/oss/deb stable main
deb [signed-by=/etc/apt/grafana.asc] https://apt.grafana.com stable main
4 changes: 2 additions & 2 deletions ansible/roles/oonidata/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ miniconda_install_dir: /opt/miniconda
jupyterhub_config_dir: /etc/jupyterhub
jupyterhub_runtime_dir: /srv/jupyterhub
oonipipeline_runtime_dir: /srv/oonipipeline
tls_cert_dir: /etc/letsencrypt/live
tls_cert_dir: /var/lib/dehydrated/certs
admin_group_name: admin
enable_oonipipeline_worker: true
enable_jupyterhub: true
clickhouse_url: "clickhouse://localhost"
certbot_domains:
ssl_domains:
- "{{ inventory_hostname }}"
conda_forge_packages:
- seaborn
Expand Down
3 changes: 0 additions & 3 deletions ansible/roles/oonidata/meta/requirements.yml

This file was deleted.

12 changes: 2 additions & 10 deletions ansible/roles/oonidata/tasks/jupyterhub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,7 @@
- nginx

- ansible.builtin.include_role:
name: geerlingguy.certbot
name: dehydrated
tags:
- oonidata
- certbot
vars:
certbot_admin_email: [email protected]
certbot_create_extra_args: ""
certbot_create_if_missing: true
certbot_create_standalone_stop_services:
- nginx
certbot_certs:
- domains: "{{ certbot_domains }}"
- dehydrated
2 changes: 0 additions & 2 deletions ansible/roles/oonidata/templates/nginx-jupyterhub.j2
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ server {
ssl_trusted_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/chain.pem;

server_name _;
access_log /var/log/nginx/{{ inventory_hostname }}.access.log;
error_log /var/log/nginx/{{ inventory_hostname }}.log warn;

add_header Access-Control-Allow-Origin *;

Expand Down
1 change: 0 additions & 1 deletion ansible/roles/oonidata_airflow/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
tls_cert_dir: /var/lib/dehydrated/certs
certbot_domains_extra: []
131 changes: 81 additions & 50 deletions ansible/roles/prometheus/templates/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ alerting:
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.

# Node exporter metrics
{% for bbjob in blackbox_jobs %}
- job_name: "{{ bbjob.name }}"
metrics_path: /probe
Expand Down Expand Up @@ -60,6 +61,51 @@ scrape_configs:
- {{ host }}:9100
{% endfor %}

- job_name: 'node_exporter'
scrape_interval: 5s
basic_auth:
username: 'prom'
password: '{{ prometheus_metrics_password_prod }}'
static_configs:
- targets:
- https://data1.htz-fsn.prod.ooni.nu/metrics/node_exporter
- https://data3.htz-fsn.prod.ooni.nu/metrics/node_exporter
- https://notebook1.htz-fsn.prod.ooni.nu/metrics/node_exporter
- http://0.do.th.prod.ooni.io:9001/metrics
- http://1.do.th.prod.ooni.io:9001/metrics
- http://2.do.th.prod.ooni.io:9001/metrics
relabel_configs:
# set the scheme based on what has been parsed in the address
- source_labels: [__address__]
regex: '(https|http)://([^/^:]+)(:\d+)?(/.*)'
replacement: '$1'
target_label: __scheme__
# set the path based on the address suffix
- source_labels: [__address__]
regex: '(https|http)://([^/^:]+)(:\d+)?(/.*)'
replacement: '$4'
target_label: __metrics_path__
# set the instance name to the address without the port
- source_labels: [__address__]
regex: '(https|http)://([^/^:]+)(:\d+)?(/.*)'
replacement: '$2'
target_label: instance
# set the environment label to prod or dev
- source_labels: [instance]
regex: '.*\.(prod|dev)\..*'
replacement: '$1'
target_label: environment
- source_labels: [environment]
regex: '^$'
replacement: 'prod'
target_label: environment
# cleanup the address to only contain the fqdn:port
- source_labels: [__address__]
regex: '(https|http)://([^/]+)(:\d+)?(/.*)'
replacement: '$2$3'
target_label: __address__

# TODO: should this be re-enabled?
# - job_name: 'netdata'
# scrape_interval: 5s
# scheme: https
Expand All @@ -73,6 +119,29 @@ scrape_configs:
# static_configs:
# - targets:

# Application level metrics

- job_name: 'clickhouse'
scrape_interval: 5s
scheme: http
metrics_path: "/metrics"
static_configs:
- targets:
- backend-fsn.ooni.org:9363

- job_name: 'clickhouse-cluster'
scrape_interval: 5s
scheme: https
metrics_path: "/metrics/clickhouse"
basic_auth:
username: 'prom'
password: '{{ prometheus_metrics_password_prod }}'
static_configs:
- targets:
- data1.htz-fsn.prod.ooni.nu
- data3.htz-fsn.prod.ooni.nu
- notebook1.htz-fsn.prod.ooni.nu

- job_name: 'raw-netdata'
scrape_interval: 5s
scheme: http
Expand All @@ -90,20 +159,7 @@ scrape_configs:
static_configs:
- targets: [ 'api.ooni.io:443' ]

- job_name: 'ooniapi-services-dev'
scrape_interval: 5s
scheme: https
metrics_path: "/metrics"
basic_auth:
username: 'prom'
password: '{{ prometheus_metrics_password_dev }}'
static_configs:
- targets:
- ooniauth.dev.ooni.io
- oonirun.dev.ooni.io
- ooniprobe.dev.ooni.io

- job_name: 'ooniapi-services-prod'
- job_name: 'ooniapi-services'
scrape_interval: 5s
scheme: https
metrics_path: "/metrics"
Expand All @@ -115,8 +171,18 @@ scrape_configs:
- ooniauth.prod.ooni.io
- oonirun.prod.ooni.io
- ooniprobe.prod.ooni.io
# these require a different password. Probably we should update them to
# take the same one for the purpose of monitoring.
# - ooniauth.dev.ooni.io
# - oonirun.dev.ooni.io
# - ooniprobe.dev.ooni.io
relabel_configs:
- source_labels: [__address__]
regex: "(prod|dev)"
target_label: environment
replacement: "$1"

- job_name: 'oonith-prod'
- job_name: 'oonith'
scrape_interval: 5s
scheme: http
metrics_path: "/metrics"
Expand All @@ -125,13 +191,9 @@ scrape_configs:
password: '{{ prometheus_metrics_password_prod }}'
static_configs:
- targets:
- 0.do.th.prod.ooni.io:9001
- 0.do.th.prod.ooni.io
- 1.do.th.prod.ooni.io:9001
- 1.do.th.prod.ooni.io
- 2.do.th.prod.ooni.io
- 2.do.th.prod.ooni.io:9001

- job_name: 'ooni-web'
scrape_interval: 5m
scheme: https
Expand All @@ -143,37 +205,6 @@ scrape_configs:
- openobservatory.github.io:443
- ooni.netlify.app:443

- job_name: 'clickhouse'
scrape_interval: 5s
scheme: http
metrics_path: "/metrics"
static_configs:
- targets:
- backend-fsn.ooni.org:9363

- job_name: 'clickhouse cluster'
scrape_interval: 5s
scheme: http
metrics_path: "/metrics/clickhouse"
basic_auth:
username: 'prom'
password: '{{ prometheus_metrics_password_prod }}'
static_configs:
- targets:
- data1.htz-fsn.prod.ooni.nu:9100
- data3.htz-fsn.prod.ooni.nu:9100

- job_name: 'node new'
scrape_interval: 5s
scheme: http
metrics_path: "/metrics/node_exporter"
basic_auth:
username: 'prom'
password: '{{ prometheus_metrics_password_prod }}'
static_configs:
- targets:
- data1.htz-fsn.prod.ooni.nu:9100
- data3.htz-fsn.prod.ooni.nu:9100

# See ansible/roles/ooni-backend/tasks/main.yml for the scraping targets
- job_name: 'haproxy'
Expand Down
Loading
Loading