Commit f06b1b65 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Merge branch 'MCSensors' into 'master'

Improvements on monitoring

See merge request !42
parents 76061c9c 3bba5264
......@@ -5,7 +5,10 @@ container_name: "{{ default_container_name }}"
edd_container: "{{ {} }}"
container_env: "{{ {} }}"
bind_port: "Not defined: Debug mode?"
redis_sidecar: False
launch_redis_sidecar: true
launch_influx_sidecar: true
install_dashboard: true # If true, no dashboard will be installed
dashboard_template: "common_dashboard.json"
dashboard_panels: ""
measurement: "{{ image_name }}"
......
---
- set_fact:
async_list: []
tags:
- always
- file:
path: "{{ data_dir }}"
state: directory
......@@ -40,3 +45,127 @@
async: 300
poll: 0
register: async_results_tmp
- name: "{{ image_name }}:: Stop EDD docker container"
block:
- name: Get docker logs
command: docker logs "{{ container_name }}"
register: dck_lg
ignore_errors: yes
- name: Save to file
copy:
content: "{{ dck_lg.stderr_lines | join('\n') }}"
dest: "{{ data_dir }}/{{ container_name }}_{{ ansible_date_time.iso8601_micro}}.log"
mode: g+rw
owner: "{{ edd_user }}"
group: "{{ edd_group }}"
ignore_errors: yes
- docker_container:
name: "{{ container_name }}"
state: absent
async: 300
poll: 0
register: async_results_tmp
- set_fact:
edd_container: '{{ edd_container | dict2items | rejectattr( "key", "equalto", container_name )|list|items2dict }}'
cacheable: true
tags:
- never
- stop
- set_fact:
async_list: "{{ async_list + [ async_results_tmp.ansible_job_id ] }}"
tags:
- always
- name: Using Dashboard
block:
- name:
debug:
msg: "Dashoard template: {{ dashboard_template }}"
- name: "{{ image_name }}:: Setup dashboard"
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/dashboards/db"
method: POST
headers:
Content-Type: "application/json;charset=UTF-8"
Accept: "application/json"
body: "{{ lookup('template','{{ dashboard_template }}') }}"
body_format: json
ignore_errors: True # Note that we will not wait for the result also, as otherwise the ignore_error is ignored
async: 300
poll: 0
register: async_results_tmp
tags:
- dashboard
- name: "{{ image_name }}:: Delete dashboard for "
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/dashboards/uid/{{ container_name }}"
method: DELETE
tags:
- never
- stop
ignore_errors: True # Note that we will not wait for the result also, as otherwise the ignore_error is ignored
async: 300
poll: 0
- set_fact:
edd_container: '{{ edd_container | dict2items | rejectattr( "key", "equalto", container_name )|list|items2dict }}'
cacheable: true
when: install_dashboard
- name: "{{ image_name }}:: Launch influx sidecar"
block:
- include_role:
name: influx_sidecar
vars:
pipeline_id: "{{ container_name }}"
katcp_host: "{{ inventory_hostname }}"
katcp_port: "{{ bind_port }}"
exclude_sensors: "{% if influx_exclude_sensors is defined and influx_exclude_sensors%} {{ influx_exclude_sensors }} {% endif %}"
tags:
- always
- set_fact:
async_list: "{{ async_list + [ async_results_tmp.ansible_job_id ] }}"
tags:
- always
when: launch_influx_sidecar
- name: "{{ image_name }}:: Launch redis sidecar"
block:
- include_role:
name: redis_sidecar
vars:
pipeline_id: "{{ container_name }}"
katcp_host: "{{ inventory_hostname }}"
katcp_port: "{{ bind_port }}"
tags:
- always
- set_fact:
async_list: "{{ async_list + [ async_results_tmp.ansible_job_id ] }}"
tags:
- always
when: launch_redis_sidecar
- name: "{{ image_name }}:: Wait for async job results"
async_status:
jid: "{{ item }}"
loop: "{{ async_list }}"
register: job_result
until: job_result.finished
retries: 100
delay: 3
tags:
- always
......@@ -17,124 +17,8 @@
- debug:
msg: " {{ container_name }} katcp free port found {{ bind_port }} -- image+version: {{ image_name }}:{{ version_tag }} "
- set_fact:
async_list: []
tags:
- always
- name: "{{ image_name }}:: Launch EDD docker container"
import_tasks: launch_tasks.yml
- set_fact:
async_list: "{{ async_list + [ async_results_tmp.ansible_job_id ] }}"
- name: "{{ image_name }}:: Stop EDD docker container"
block:
- name: Get docker logs
command: docker logs "{{ container_name }}"
register: dck_lg
ignore_errors: yes
- name: Save to file
copy:
content: "{{ dck_lg.stderr_lines | join('\n') }}"
dest: "{{ data_dir }}/{{ container_name }}_{{ ansible_date_time.iso8601_micro}}.log"
mode: g+rw
owner: "{{ edd_user }}"
group: "{{ edd_group }}"
ignore_errors: yes
- docker_container:
name: "{{ container_name }}"
state: absent
async: 300
poll: 0
register: async_results_tmp
- set_fact:
edd_container: '{{ edd_container | dict2items | rejectattr( "key", "equalto", container_name )|list|items2dict }}'
cacheable: true
tags:
- never
- stop
- name: Using Dashboard
block:
- name: "{{ image_name }}:: Setup dashboard"
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/dashboards/db"
method: POST
headers:
Content-Type: "application/json;charset=UTF-8"
Accept: "application/json"
body: "{{ lookup('template','{{ dashboard_template }}') }}"
body_format: json
ignore_errors: yes
async: 300
poll: 0
register: async_results_tmp
tags:
- dashboard
- name: "{{ image_name }}:: Delete dashboard for "
uri:
url: "http://{{ grafana_host }}:{{ grafana_port }}/api/dashboards/uid/{{ container_name }}"
method: DELETE
tags:
- never
- stop
ignore_errors: yes
async: 300
poll: 0
register: async_results_tmp
- set_fact:
edd_container: '{{ edd_container | dict2items | rejectattr( "key", "equalto", container_name )|list|items2dict }}'
cacheable: true
when: dashboard_panels is defined or dashboard_template is defined
- name: "{{ image_name }}:: Launch influx sidecar"
include_role:
name: influx_sidecar
vars:
pipeline_id: "{{ container_name }}"
katcp_host: "{{ inventory_hostname }}"
katcp_port: "{{ bind_port }}"
exclude_sensors: "{% if influx_exclude_sensors is defined and influx_exclude_sensors%} {{ influx_exclude_sensors }} {% endif %}"
tags:
- always
when: dashboard_panels is defined or dashboard_template is defined or launch_influx_sidecar is defined and launch_influx_sidecar
- set_fact:
async_list: "{{ async_list + [ async_results_tmp.ansible_job_id ] }}"
- name: "{{ image_name }}:: Launch redis sidecar"
include_role:
name: redis_sidecar
vars:
pipeline_id: "{{ container_name }}"
katcp_host: "{{ inventory_hostname }}"
katcp_port: "{{ bind_port }}"
tags:
- always
when: launch_redis_sidecar is defined and launch_redis_sidecar
- set_fact:
async_list: "{{ async_list + [ async_results_tmp.ansible_job_id ] }}"
- name: "{{ image_name }}:: Wait for async job results"
async_status:
jid: "{{ item }}"
loop: "{{ async_list }}"
register: job_result
until: job_result.finished
retries: 100
delay: 3
tags:
- always
......@@ -46,9 +46,11 @@
image_name: edd_master_controller
container_name: MASTER_CONTROLLER
container_cmd: "/usr/bin/python /src/mpikat/mpikat/effelsberg/edd/edd_master_controller.py --host={{ edd_subnet }} --port={{ master_controller_port }} --edd_ansible_inventory={{ edd_inventory_folder }}"
bind_port: "{{ master_controller_port }}"
launch_influx_sidecar: true
launch_redis_sidecar: true
install_dashboard: false
register_as_pipeline: false
- set_fact:
master_controller_ip: "{{ ansible_host }}"
#- set_fact:
#master_controller_port: "{{ bind_port }}"
......@@ -17,6 +17,7 @@ RUN apt-get -y update &&\
python-enum34 \
python-paramiko \
python-kerberos \
python-pygraphviz \
python-networkx &&\
pip install ansible==2.10.7
......
......@@ -7,5 +7,8 @@
container_name: SCPI_INTERFACE
container_cmd: "/usr/bin/python /src/mpikat/mpikat/effelsberg/edd/edd_scpi_interface.py --host={{ edd_subnet }} --port={{ effelsberg_scpi_port }} --master-controller-ip={{ master_controller_ip}} --master-controller-port={{ master_controller_port }} --redis-ip {{ redis_storage }}"
register_as_pipeline: False
launch_redis_sidecar: false
launch_influx_sidecar: false
install_dashboard: false
tags:
- always
......@@ -7,6 +7,7 @@
container_name: EFFELSBERG_STATUS_SERVER
container_cmd: "/usr/bin/python /src/mpikat/mpikat/effelsberg/status_server.py --host={{ edd_subnet }} --port={{ bind_port }} --redis-ip {{ redis_storage }}"
launch_influx_sidecar: True
launch_redis_sidecar: False
register_as_pipeline: False
measurement: telescope_meta
dashboard_template: TelescopeMeta.json
......
......@@ -66,7 +66,7 @@
- name: Launch grafana monitoring
docker_container:
name: "edd_grafana"
image: grafana/grafana:7.3.5
image: grafana/grafana:8.1.2
detach: yes
state: started # ensure that is running
network_mode: bridge
......@@ -82,7 +82,7 @@
volumes:
- edd_grafana_data:/data
env:
GF_INSTALL_PLUGINS: "natel-discrete-panel,grafana-clock-panel,redis-datasource,dalvany-image-panel,jdbranham-diagram-panel,grafana-piechart-panel,marcusolsson-dynamictext-panel,mtanda-histogram-panel,aidanmountford-html-panel,mxswat-separator-panel,marcuscalidus-svg-panel,https://github.com/ae3e/ae3e-plotly-panel/archive/v0.3.2.zip;ae3e-plotly-panel"
GF_INSTALL_PLUGINS: "natel-discrete-panel,grafana-clock-panel,redis-datasource,dalvany-image-panel,jdbranham-diagram-panel,grafana-piechart-panel,marcusolsson-dynamictext-panel,mtanda-histogram-panel,aidanmountford-html-panel,mxswat-separator-panel,marcuscalidus-svg-panel,https://github.com/ae3e/ae3e-plotly-panel/releases/download/v0.5.0/ae3e-plotly-panel-0.5.0.zip;ae3e-plotly-panel"
GF_AUTH_DISABLE_LOGIN_FORM: "true" # Default as admin for testing, think about suer management later
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
......
......@@ -8,6 +8,12 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
......@@ -15,123 +21,362 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 9,
"id": 3064,
"links": [],
"panels": [
{
"datasource": null,
"datasource": "Redis_Plots",
"description": "",
"fieldConfig": {
"defaults": {
"custom": {}
"color": {
"mode": "thresholds"
},
"mappings": [
{
"options": {
"match": "empty",
"result": {
"color": "dark-purple",
"index": 0,
"text": "Unprovisioned"
}
},
"type": "special"
}
],
"noValue": "Unprovisioned",
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "dark-blue",
"value": null
}
]
}
},
"overrides": []
},
"folderId": null,
"gridPos": {
"h": 16,
"h": 3,
"w": 12,
"x": 0,
"y": 0
},
"id": 2093487693494,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^value$/",
"values": false
},
"textMode": "value"
},
"pluginVersion": "8.1.2",
"targets": [
{
"command": "hget",
"field": "value",
"keyName": "MASTER_CONTROLLER:provision",
"query": "",
"refId": "A",
"type": "command"
}
],
"title": "Current Provision",
"type": "stat"
},
{
"aliasColors": {},
"cacheTimeout": null,
"css_data": ".graph{\ntext-align: center;\n}",
"datasource": "Redis_Plots",
"doInit": {},
"format": "short",
"gridPos": {
"h": 17,
"w": 8,
"x": 4,
"y": 3
},
"handleMetric": {},
"html_data": "<div class=\"graph\" timestamp=0>\nNo graph provided\n</div>",
"id": 2093487693488,
"interval": null,
"js_code": "var svg = ctrl.data[0].rows[0].value;\nvar old_timestamp = ctrl.data[0].rows[0].timestamp;\nvar summary = JSON.parse(ctrl.data[1].rows[0].value);\n\nvar graph = htmlnode.lastChild;\n\ntimestamp = graph.getAttribute(\"timestamp\");\nif (timestamp < old_timestamp)\n{\n graph.setAttribute(\"timestamp\", old_timestamp);\n\n graph.innerHTML = svg;\n \n //var parser = new DOMParser();\n //var doc = parser.parseFromString(svg, \"image/svg+xml\");\n //console.log(doc);\n //graph.appendChild(doc);\n}\n\ngraph.children[0].setAttribute(\"width\", \"100%\");\nvar svg_graph = graph.children[0].children[0];\nfor (var el of svg_graph.children)\n{\n if (el.hasAttribute(\"class\") && el.getAttribute(\"class\") == \"node\" )\n {\n var product_name = el.children[0].innerHTML;\n if (summary[product_name] == \"error\")\n {\n el.children[1].setAttribute(\"fill\", \"#aa0000\");\n el.children[2].setAttribute(\"fill\", \"#ffffff\");\n }\n else if (summary[product_name] == \"idle\")\n {\n el.children[1].setAttribute(\"fill\", \"#6ed0e0\");\n el.children[2].setAttribute(\"fill\", \"#000000\");\n }\n else if (summary[product_name] == \"configured\")\n {\n el.children[1].setAttribute(\"fill\", \"#5794f2\");\n el.children[2].setAttribute(\"fill\", \"#000000\");\n }\n else\n {\n el.children[1].setAttribute(\"fill\", \"#00aa00\");\n el.children[2].setAttribute(\"fill\", \"#000000\");\n\n }\n }\n}\n\n\n",
"js_init_code": "",
"links": [],
"maxDataPoints": 3,
"nullPointMode": "connected",
"pluginVersion": "8.1.2",
"targets": [
{
"command": "hmget",
"field": "value timestamp",
"keyName": "MASTER_CONTROLLER:configuration_graph",
"query": "",
"refId": "A",
"type": "command"
},
{
"command": "hget",
"field": "value",
"hide": false,
"keyName": "MASTER_CONTROLLER:product-status-summary",
"query": "",
"refId": "B",
"type": "command"
}
],
"title": "Current Configuration Graph",
"type": "aidanmountford-html-panel"
},
{
"datasource": null,
"folderId": null,
"gridPos": {
"h": 17,
"w": 12,
"x": 12,
"y": 0
},
"headings": false,
"id": 2,
"limit": 100,
"pluginVersion": "7.2.2",
"options": {
"folderId": null,
"maxItems": 100,
"query": "",
"showHeadings": false,
"showRecentlyViewed": false,
"showSearch": true,
"showStarred": false,
"tags": [
"provisioned"
]
},
"pluginVersion": "8.1.2",
"query": "",
"recent": false,
"search": true,
"starred": false,
"tags": ["provisioned"],
"tags": [
"provisioned"
],
"timeFrom": null,
"timeShift": null,
"title": "Provisioned EDD Components",
"type": "dashlist"
},
{
"type": "dashlist",
"title": "Computing Components",
"gridPos": {
"x": 12,
"y": 0,
"w": 12,
"h": 8
},
"id": 123125,
"targets": [
{
"refId": "A",
"policy": "default",
"resultFormat": "time_series",
"orderByTime": "ASC",
"tags": [],
"groupBy": [
{
"type": "time",
"params": [
"$__interval"
]
{
"datasource": "Redis_Plots",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
{
"type": "fill",
"params": [
"null"
"mappings": [
{
"options": {
"error": {
"color": "dark-red",
"index": 0
},
"panic": {
"color": "semi-dark-red",
"index": 1
}
},
"type": "value"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
],
"select": [
[
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 0,
"y": 3
},
"id": 2093487693496,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [