From 23db78eb1b1b821c40a10cb651d37c6769e92fe9 Mon Sep 17 00:00:00 2001 From: Doug Szumski Date: Wed, 9 May 2018 17:13:17 +0100 Subject: [PATCH 1/3] Add support for deploying Fluentd on Swarm cluster --- ansible/deploy_swarm_monitoring.yml | 10 +++++ ansible/group_vars/all/all | 10 ++++- .../roles/monasca_fluentd/handlers/main.yml | 6 +++ ansible/roles/monasca_fluentd/tasks/main.yml | 39 ++++++++++++++++ .../monasca_fluentd/templates/fluentd.conf.j2 | 45 +++++++++++++++++++ 5 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 ansible/deploy_swarm_monitoring.yml create mode 100644 ansible/roles/monasca_fluentd/handlers/main.yml create mode 100644 ansible/roles/monasca_fluentd/tasks/main.yml create mode 100644 ansible/roles/monasca_fluentd/templates/fluentd.conf.j2 diff --git a/ansible/deploy_swarm_monitoring.yml b/ansible/deploy_swarm_monitoring.yml new file mode 100644 index 0000000..b0790e8 --- /dev/null +++ b/ansible/deploy_swarm_monitoring.yml @@ -0,0 +1,10 @@ +# +# Copyright StackHPC, 2018 +# +--- +- name: Deploy Swarm cluster monitoring + hosts: cluster + become: yes + roles: + - role: monasca_fluentd + diff --git a/ansible/group_vars/all/all b/ansible/group_vars/all/all index 078c77c..65803e4 100644 --- a/ansible/group_vars/all/all +++ b/ansible/group_vars/all/all @@ -15,7 +15,7 @@ alaska_cloud: alaska alaska_homedir: /alaska alaska_softiron: 10.4.99.101 -# OpenStack fully qualified project name +# OpenStack fully qualified project name (used for Grafana with domain support) project_name: p3@default # Virtual IP address of the controller node @@ -28,6 +28,14 @@ alaska_monitoring_server: 10.60.253.3 monasca_agent_p3_username: p3-monasca-agent monasca_agent_p3_password: "{{ vault_monasca_agent_password }}" +# Monasca Fluentd config +monasca_fluentd_log_api_uri: http://{{ controller_vip }}:5607 +monasca_fluentd_keystone_uri: http://{{ controller_vip }}:5000 +monasca_fluentd_username: "{{ monasca_agent_p3_username }}" +monasca_fluentd_password: "{{ monasca_agent_p3_password }}" +monasca_fluentd_project_domain_id: default +monasca_fluentd_project_name: p3 + # Local Grafana admin account for configuring Grafana grafana_admin_username: grafana-admin grafana_admin_password: "{{ vault_grafana_admin_password }}" diff --git a/ansible/roles/monasca_fluentd/handlers/main.yml b/ansible/roles/monasca_fluentd/handlers/main.yml new file mode 100644 index 0000000..e2f583c --- /dev/null +++ b/ansible/roles/monasca_fluentd/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart Fluentd + docker_container: + name: fluentd + restart: True + when: fluentd_config.changed diff --git a/ansible/roles/monasca_fluentd/tasks/main.yml b/ansible/roles/monasca_fluentd/tasks/main.yml new file mode 100644 index 0000000..803f653 --- /dev/null +++ b/ansible/roles/monasca_fluentd/tasks/main.yml @@ -0,0 +1,39 @@ +# +# Copyright StackHPC, 2018 +# +--- +- name: Create Fluentd user + user: + name: fluent + state: present + +- name: Create Fluentd config directory + file: + path: /etc/fluentd + state: directory + owner: fluent + group: fluent + mode: 0755 + +- name: Generate Fluentd config + template: + src: fluentd.conf.j2 + dest: /etc/fluentd/fluentd.conf + owner: fluent + group: fluent + mode: 0644 + register: fluentd_config + notify: Restart Fluentd + +- name: Start Fluentd container + docker_container: + name: fluentd + image: stackhpc/monasca-fluentd:latest + state: started + ports: + - "24224:24224" + - "24224:24224/udp" + volumes: + - /etc/fluentd/:/fluentd/etc:ro + env: + FLUENTD_CONF: "fluentd.conf" diff --git a/ansible/roles/monasca_fluentd/templates/fluentd.conf.j2 b/ansible/roles/monasca_fluentd/templates/fluentd.conf.j2 new file mode 100644 index 0000000..609a003 --- /dev/null +++ b/ansible/roles/monasca_fluentd/templates/fluentd.conf.j2 @@ -0,0 +1,45 @@ +# Accept logs from Docker Fluentd log driver + + @type forward + port 24224 + bind 0.0.0.0 + + +# Add a timestamp dimension to all logs to record the event time. The +# event time is the time extracted from the log message in all cases +# where the time_key is set, and the time the record entered fluentd +# if no time_key is set. +# logs. + + @type record_transformer + + timestamp ${time} + + + +# Docker saves all logs under the 'log' field. The fluentd-monasca +# plugin assumes that they are saved under the 'message' field. Here +# we map the 'log' field to the 'message' field for all logs. + + @type record_transformer + enable_ruby true + + message ${record["log"]} + + remove_keys log + + + + type copy + + @type monasca + keystone_url {{ monasca_fluentd_keystone_uri }} + monasca_log_api {{ monasca_fluentd_log_api_uri }} + monasca_log_api_version v3.0 + username {{ monasca_fluentd_username }} + password {{ monasca_fluentd_password }} + domain_id {{ monasca_fluentd_project_domain_id }} + project_name {{ monasca_fluentd_project_name }} + + + From 0041a1dda7fc9367924378cad1d060b2c708748a Mon Sep 17 00:00:00 2001 From: Doug Szumski Date: Thu, 10 May 2018 15:09:59 +0100 Subject: [PATCH 2/3] Add John G's Prometheus roles --- ansible/deploy_swarm_monitoring.yml | 7 + .../prometheus-docker-node/tasks/main.yml | 23 + .../roles/prometheus-server/defaults/main.yml | 3 + .../files/grafana_dashboard.json | 406 ++++++++++++++++++ .../roles/prometheus-server/tasks/main.yml | 101 +++++ .../prometheus-server/templates/alerts.rules | 20 + .../templates/prometheus.yml | 51 +++ 7 files changed, 611 insertions(+) create mode 100644 ansible/roles/prometheus-docker-node/tasks/main.yml create mode 100644 ansible/roles/prometheus-server/defaults/main.yml create mode 100644 ansible/roles/prometheus-server/files/grafana_dashboard.json create mode 100644 ansible/roles/prometheus-server/tasks/main.yml create mode 100644 ansible/roles/prometheus-server/templates/alerts.rules create mode 100644 ansible/roles/prometheus-server/templates/prometheus.yml diff --git a/ansible/deploy_swarm_monitoring.yml b/ansible/deploy_swarm_monitoring.yml index b0790e8..c5bece7 100644 --- a/ansible/deploy_swarm_monitoring.yml +++ b/ansible/deploy_swarm_monitoring.yml @@ -7,4 +7,11 @@ become: yes roles: - role: monasca_fluentd + - role: prometheus-docker-node + +- name: Deploy Prometheus server + hosts: master + become: yes + roles: + - role: prometheus-server diff --git a/ansible/roles/prometheus-docker-node/tasks/main.yml b/ansible/roles/prometheus-docker-node/tasks/main.yml new file mode 100644 index 0000000..ff6d255 --- /dev/null +++ b/ansible/roles/prometheus-docker-node/tasks/main.yml @@ -0,0 +1,23 @@ +--- +- name: Add Prometheus node-exporter + docker_container: + name: prom-node-exporter + image: prom/node-exporter + network_mode: host # TODO should access host fs too + ports: + - 9100:9100 + +- name: Add cAdvisor + docker_container: + name: cAdvisor + image: google/cadvisor:latest + privileged: yes + detach: yes + ports: + - 8080:8080 + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro diff --git a/ansible/roles/prometheus-server/defaults/main.yml b/ansible/roles/prometheus-server/defaults/main.yml new file mode 100644 index 0000000..72c8632 --- /dev/null +++ b/ansible/roles/prometheus-server/defaults/main.yml @@ -0,0 +1,3 @@ +--- + +prometheus_home: /home/centos/prometheus # TODO - better default? diff --git a/ansible/roles/prometheus-server/files/grafana_dashboard.json b/ansible/roles/prometheus-server/files/grafana_dashboard.json new file mode 100644 index 0000000..5d16730 --- /dev/null +++ b/ansible/roles/prometheus-server/files/grafana_dashboard.json @@ -0,0 +1,406 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "id": 1, + "links": [], + "refresh": false, + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{job='node',device='eno1'}[5m])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{device}}", + "refId": "A", + "step": 2 + }, + { + "expr": "irate(node_infiniband_port_data_received_bytes{job='node',device=\"mlx5_0\"}[5m])", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{device}}", + "refId": "C", + "step": 2 + }, + { + "expr": "irate(node_infiniband_port_data_transmitted_bytes{job='node',device=\"mlx5_0\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{device}}", + "refId": "D", + "step": 2 + }, + { + "expr": "irate(node_network_transmit_bytes{job='node',device='eno1'}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{device}}", + "refId": "E", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms{job='node',device!~'^(md\\\\d+$|dm-)'}[5m]) / 1000 * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} - {{device}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk bandwidth", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu{job=\"node\",mode=\"idle\"}[5m])) * 100)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_Active{job=\"node\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Spark", + "version": 11 +} diff --git a/ansible/roles/prometheus-server/tasks/main.yml b/ansible/roles/prometheus-server/tasks/main.yml new file mode 100644 index 0000000..02db899 --- /dev/null +++ b/ansible/roles/prometheus-server/tasks/main.yml @@ -0,0 +1,101 @@ +--- +- name: Create prometheus data dir + file: + path: "{{ prometheus_home }}/data" + state: directory + mode: 0755 + +- name: Configure prometheus targets + template: + src: prometheus.yml # TODO - add ip addresses properly + dest: "{{ prometheus_home }}/prometheus.yml" + +- name: Configure prometheus alerts + template: + src: alerts.rules + dest: "{{ prometheus_home }}/alerts.rules" + +- name: Add Prometheus server + docker_container: + name: prometheus + image: prom/prometheus + ports: + - 9090:9090 + volumes: + - "{{ prometheus_home }}/prometheus.yml:/etc/prometheus/prometheus.yml" + - "{{ prometheus_home }}/alerts.rules:/etc/prometheus/alerts.rules" + - "{{ prometheus_home }}/data:/prometheus prom/prometheus" + +- name: Create Grafana dir + file: + path: "/var/lib/grafana" + state: directory + mode: 0755 + +- name: Add Grafana + docker_container: + name: grafana + image: grafana/grafana + ports: + - 3000:3000 # TODO - add more config, github OAuth? + volumes: + - /var/lib/grafana + +- name: check datasource + uri: + url: "http://{{ ansible_host}}:3000/api/datasources" + method: GET + user: admin + password: admin + force_basic_auth: yes + return_content: yes + register: datasources + +- name: Add datasource + uri: + url: "http://{{ ansible_host}}:3000/api/datasources" + method: POST + user: admin + password: admin + force_basic_auth: yes + status_code: 200 + body_format: json + body: '{"name": "Prometheus", + "type": "prometheus", + "access": "proxy", + "url": "http://{{ ansible_host }}:9090", + "password": "", + "user": "", + "basicAuth": false, + "basicAuthUser": "", + "basicAuthPassword": "", + "isDefault": true, + "jsonData": null }' + when: datasources.json == [] + +- name: check datasource + uri: + url: "http://{{ ansible_host}}:3000/api/search?query=Spark" + method: GET + user: admin + password: admin + force_basic_auth: yes + return_content: yes + register: dashboards + +- debug: + msg: "Return value: {{ dashboards.json }}" + +- name: Add dashboard + uri: + url: "http://{{ ansible_host}}:3000/api/dashboards/db" + method: POST + user: admin + password: admin + force_basic_auth: yes + status_code: 200 + body_format: json + body: + dashboard: '{{ lookup("file","grafana_dashboard.json") }}' + overwrite: true + when: dashboards.json == [] diff --git a/ansible/roles/prometheus-server/templates/alerts.rules b/ansible/roles/prometheus-server/templates/alerts.rules new file mode 100644 index 0000000..a4f0a33 --- /dev/null +++ b/ansible/roles/prometheus-server/templates/alerts.rules @@ -0,0 +1,20 @@ +# {{ ansible_managed }} + +{% raw %} +ALERT InstanceDown + IF up{job="node"} == 0 + FOR 5m + LABELS { severity = "page" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} down", + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.", + } + +ALERT DummyInfiniBandTrafficHigh + IF irate(node_infiniband_port_data_received_bytes{job='node'}[5m]) > 1400 + FOR 30s + LABELS { severity = "low" } + ANNOTATIONS { + summary = "Instance {{ $labels.instance }} high traffic on {{ $labels.device}}" + } +{% endraw %} diff --git a/ansible/roles/prometheus-server/templates/prometheus.yml b/ansible/roles/prometheus-server/templates/prometheus.yml new file mode 100644 index 0000000..75538b4 --- /dev/null +++ b/ansible/roles/prometheus-server/templates/prometheus.yml @@ -0,0 +1,51 @@ +# {{ ansible_managed }} + +global: + scrape_interval: 15s + evaluation_interval: 15s + + external_labels: + monitor: 'sahara-test' + +rule_files: + # TODO - "alerts.rules" + +scrape_configs: + - job_name: 'prometheus' + + static_configs: + - targets: +{% for node in groups['master'] %} + - '{{ node }}:9090' +{% endfor %} + + - job_name: 'node' + scrape_interval: 15s + static_configs: + - targets: +{% for node in groups['master'] %} + - '{{ node }}:9100' +{% endfor %} +{% for node in groups['slave'] %} + - '{{ node }}:9100' +{% endfor %} + + params: + collect[]: + - cpu + - meminfo + - diskstats + - netdev + - netstat + - infiniband + + - job_name: 'cAdvisor' + scrape_interval: 15s + static_configs: + - targets: +{% for node in groups['master'] %} + - '{{ node }}:8080' +{% endfor %} +{% for node in groups['slave'] %} + - '{{ node }}:8080' +{% endfor %} From a17fe52f97e2e59b41dafdeac2151ae22b148667 Mon Sep 17 00:00:00 2001 From: Doug Szumski Date: Thu, 10 May 2018 15:44:48 +0100 Subject: [PATCH 3/3] Support deploying Monasca Agent via Docker --- ansible/deploy_swarm_monitoring.yml | 1 + ansible/group_vars/all/all | 10 ++ .../roles/monasca_agent_docker/tasks/main.yml | 116 ++++++++++++++++++ .../templates/cpu.yaml.j2 | 4 + .../templates/disk.yaml.j2 | 6 + .../templates/docker.yaml.j2 | 5 + .../templates/ib_network.yaml.j2 | 4 + .../templates/load.yaml.j2 | 4 + .../templates/memory.yaml.j2 | 4 + .../templates/network.yaml.j2 | 5 + 10 files changed, 159 insertions(+) create mode 100644 ansible/roles/monasca_agent_docker/tasks/main.yml create mode 100644 ansible/roles/monasca_agent_docker/templates/cpu.yaml.j2 create mode 100644 ansible/roles/monasca_agent_docker/templates/disk.yaml.j2 create mode 100644 ansible/roles/monasca_agent_docker/templates/docker.yaml.j2 create mode 100644 ansible/roles/monasca_agent_docker/templates/ib_network.yaml.j2 create mode 100644 ansible/roles/monasca_agent_docker/templates/load.yaml.j2 create mode 100644 ansible/roles/monasca_agent_docker/templates/memory.yaml.j2 create mode 100644 ansible/roles/monasca_agent_docker/templates/network.yaml.j2 diff --git a/ansible/deploy_swarm_monitoring.yml b/ansible/deploy_swarm_monitoring.yml index c5bece7..d5e11f1 100644 --- a/ansible/deploy_swarm_monitoring.yml +++ b/ansible/deploy_swarm_monitoring.yml @@ -6,6 +6,7 @@ hosts: cluster become: yes roles: + - role: monasca_agent_docker - role: monasca_fluentd - role: prometheus-docker-node diff --git a/ansible/group_vars/all/all b/ansible/group_vars/all/all index 65803e4..a6964d5 100644 --- a/ansible/group_vars/all/all +++ b/ansible/group_vars/all/all @@ -36,6 +36,16 @@ monasca_fluentd_password: "{{ monasca_agent_p3_password }}" monasca_fluentd_project_domain_id: default monasca_fluentd_project_name: p3 +# Monasca Agent (Docker) config +monasca_agent_docker_forwarder_port: 17120 +monasca_agent_docker_log_level: INFO +monasca_agent_docker_api_uri: http://{{ controller_vip }}:8082/v2.0 +monasca_agent_docker_keystone_uri: http://{{ controller_vip }}:5000/v3 +monasca_agent_docker_username: "{{ monasca_agent_p3_username }}" +monasca_agent_docker_password: "{{ monasca_agent_p3_password }}" +monasca_agent_docker_project_name: p3 + + # Local Grafana admin account for configuring Grafana grafana_admin_username: grafana-admin grafana_admin_password: "{{ vault_grafana_admin_password }}" diff --git a/ansible/roles/monasca_agent_docker/tasks/main.yml b/ansible/roles/monasca_agent_docker/tasks/main.yml new file mode 100644 index 0000000..46ec759 --- /dev/null +++ b/ansible/roles/monasca_agent_docker/tasks/main.yml @@ -0,0 +1,116 @@ +# +# Copyright StackHPC, 2018 +# +--- +- name: Start monasca-agent-forwarder container + docker_container: + name: monasca-agent-forwarder + pull: true + image: stackhpc/agent-forwarder:latest + state: started + volumes: + - plugins:/etc/monasca/agent/conf.d:ro + ports: + - "{{ monasca_agent_docker_forwarder_port }}:{{ monasca_agent_docker_forwarder_port }}" + env: + LOG_LEVEL: "{{ monasca_agent_docker_log_level }}" + OS_AUTH_URL: "{{ monasca_agent_docker_keystone_uri }}" + OS_USERNAME: "{{ monasca_agent_docker_username }}" + OS_PASSWORD: "{{ monasca_agent_docker_password }}" + OS_USER_DOMAIN_NAME: Default + OS_PROJECT_NAME: "{{ monasca_agent_docker_project_name }}" + OS_PROJECT_DOMAIN_NAME: Default + MONASCA_URL: "{{ monasca_agent_docker_api_uri }}" + SERVICE_TYPE: monitoring + ENDPOINT_TYPE: public + REGION_NAME: RegionOne + AGENT_HOSTNAME: "{{ ansible_hostname }}" + FORWARDER_URL: "http://monasca_agent-forwarder:{{ monasca_agent_docker_forwarder_port }}" + FORWARDER_PORT: "{{ monasca_agent_docker_forwarder_port }}" + +- name: Create Monasca collector plugin directory + file: + path: /etc/monasca/agent/conf.d/ + state: directory + owner: root + group: root + mode: 0755 + +- name: Template Monasca collector plugins + template: + src: "{{ item }}.j2" + dest: "/etc/monasca/agent/conf.d/{{ item }}" + owner: root + mode: 0644 + with_items: + - cpu.yaml + - docker.yaml + - disk.yaml + - ib_network.yaml + - load.yaml + - memory.yaml + - network.yaml + +- name: Wait for forwarder service + wait_for: + port: "{{ monasca_agent_docker_forwarder_port }}" + delay: 1 + +- name: Start monasca-agent-collector container + docker_container: + name: monasca-agent-collector + pull: true + links: + - monasca-agent-forwarder + image: stackhpc/agent-collector:latest + state: started + env: + DOCKER: True + LOG_LEVEL: "{{ monasca_agent_docker_log_level }}" + OS_AUTH_URL: "{{ monasca_agent_docker_keystone_uri }}" + OS_USERNAME: "{{ monasca_agent_docker_username }}" + OS_PASSWORD: "{{ monasca_agent_docker_password }}" + OS_USER_DOMAIN_NAME: Default + OS_PROJECT_NAME: "{{ monasca_agent_docker_project_name }}" + OS_PROJECT_DOMAIN_NAME: Default + MONASCA_URL: "{{ monasca_agent_docker_api_uri }}" + SERVICE_TYPE: monitoring + ENDPOINT_TYPE: public + REGION_NAME: RegionOne + AGENT_HOSTNAME: "{{ ansible_hostname }}" + FORWARDER_URL: "http://monasca-agent-forwarder:{{ monasca_agent_docker_forwarder_port }}" + FORWARDER_PORT: "{{ monasca_agent_docker_forwarder_port }}" + volumes: + - "/:/rootfs" + - "/var/run:/var/run:rw" + - "/sys:/sys:ro" + - "/var/lib/docker/:/var/lib/docker:ro" + - "/dev/disk/:/dev/disk:ro" + - "/etc/monasca/agent/conf.d:/etc/monasca/agent/conf.d:ro" + +- name: Start monasca-agent-statsd container + docker_container: + name: monasca-agent-statsd + pull: true + links: + - monasca-agent-forwarder + image: stackhpc/agent-statsd:latest + state: started + ports: + - "8125:8125/udp" + env: + DOCKER: True + LOG_LEVEL: "{{ monasca_agent_docker_log_level }}" + OS_AUTH_URL: "{{ monasca_agent_docker_keystone_uri }}" + OS_USERNAME: "{{ monasca_agent_docker_username }}" + OS_PASSWORD: "{{ monasca_agent_docker_password }}" + OS_USER_DOMAIN_NAME: Default + OS_PROJECT_NAME: "{{ monasca_agent_docker_project_name }}" + OS_PROJECT_DOMAIN_NAME: Default + MONASCA_URL: "{{ monasca_agent_docker_api_uri }}" + SERVICE_TYPE: monitoring + ENDPOINT_TYPE: public + REGION_NAME: RegionOne + AGENT_HOSTNAME: "{{ ansible_hostname }}" + FORWARDER_URL: "http://monasca-agent-forwarder:{{ monasca_agent_docker_forwarder_port }}" + FORWARDER_PORT: "{{ monasca_agent_docker_forwarder_port }}" diff --git a/ansible/roles/monasca_agent_docker/templates/cpu.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/cpu.yaml.j2 new file mode 100644 index 0000000..2aa6a36 --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/cpu.yaml.j2 @@ -0,0 +1,4 @@ +init_config: null +instances: +- built_by: System + name: cpu_stats diff --git a/ansible/roles/monasca_agent_docker/templates/disk.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/disk.yaml.j2 new file mode 100644 index 0000000..a4bb1dd --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/disk.yaml.j2 @@ -0,0 +1,6 @@ +init_config: null +instances: +- built_by: System + device_blacklist_re: .*freezer_backup_snap.* + ignore_filesystem_types: iso9660,tmpfs + name: disk_stats diff --git a/ansible/roles/monasca_agent_docker/templates/docker.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/docker.yaml.j2 new file mode 100644 index 0000000..2f3f3eb --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/docker.yaml.j2 @@ -0,0 +1,5 @@ +init_config: + docker_root: '/' + +instances: + - url: 'unix://var/run/docker.sock' diff --git a/ansible/roles/monasca_agent_docker/templates/ib_network.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/ib_network.yaml.j2 new file mode 100644 index 0000000..7ffd569 --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/ib_network.yaml.j2 @@ -0,0 +1,4 @@ +init_config: null +instances: +- built_by: IBNetworkDetect + name: ib_network_stats diff --git a/ansible/roles/monasca_agent_docker/templates/load.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/load.yaml.j2 new file mode 100644 index 0000000..c5ce0ca --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/load.yaml.j2 @@ -0,0 +1,4 @@ +init_config: null +instances: +- built_by: System + name: load_stats diff --git a/ansible/roles/monasca_agent_docker/templates/memory.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/memory.yaml.j2 new file mode 100644 index 0000000..c9854a3 --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/memory.yaml.j2 @@ -0,0 +1,4 @@ +init_config: null +instances: +- built_by: System + name: memory_stats diff --git a/ansible/roles/monasca_agent_docker/templates/network.yaml.j2 b/ansible/roles/monasca_agent_docker/templates/network.yaml.j2 new file mode 100644 index 0000000..5188355 --- /dev/null +++ b/ansible/roles/monasca_agent_docker/templates/network.yaml.j2 @@ -0,0 +1,5 @@ +init_config: null +instances: +- built_by: System + excluded_interface_re: lo.*|vnet.*|tun.*|ovs.*|br.*|tap.*|qbr.*|qvb.*|qvo.* + name: network_stats