Commit d3840083 authored by Tobias Winchen's avatar Tobias Winchen
Browse files

Merge branch 'master' into production

parents 75bc3431 f9167117
......@@ -12,7 +12,7 @@ mpikat_branch: "production"
# Repository and branch of the provisioning system (roles and provision descriptions) used by the master controller
provision_repository: "https://gitlab.mpcdf.mpg.de/mpifr-bdg/edd_provisioning.git"
provision_branch: "TNRT"
provision_branch: "master"
version_tag: "latest"
......@@ -39,7 +39,7 @@ ansible_python_interpreter: auto_legacy_silent
edd_subnet: "0.0.0.0"
# Subnet used for highspeed data connections
high_speed_data_subnet: "10.128.42.0"
high_speed_data_subnet: "192.168.91.0"
high_speed_data_subnetmask: "255.255.255.0"
# Port used for the ssh connections of the master controller tot he ansible interface
......@@ -103,16 +103,16 @@ grafana_port: "3000"
#ToDo: Fill in packetizer addresses
#npc_devices:
# s_band_packetizer:
# control_ip: 10.96.64.41
# control_port: 7147
#
# ku_band_packetizer:
# control_ip: 10.96.64.42
# control_port: 7147
npc_devices:
packetizer:
control_ip: 192.168.90.101
control_port: 7147
interfaces:
0:
mac: 12:4d:01:00:34:00
ip: 192.168.91.18
1:
mac: 12:4d:01:00:34:01
ip: 192.168.91.19
---
devices_networking:
- "/dev/infiniband/rdma_cm"
- "/dev/infiniband/issm0"
- "/dev/infiniband/ucm0"
- "/dev/infiniband/umad0"
- "/dev/infiniband/uverbs0"
- "/dev/infiniband/issm1"
- "/dev/infiniband/ucm1"
- "/dev/infiniband/umad1"
- "/dev/infiniband/uverbs1"
- "/dev/infiniband/issm2"
- "/dev/infiniband/ucm2"
- "/dev/infiniband/umad2"
- "/dev/infiniband/uverbs2"
docker_runtime:
nvidia
......@@ -3,34 +3,37 @@ all:
children:
gpu_server:
hosts:
192.168.90.1
192.168.90.2
192.168.90.[1:4]
infrastructure_server: # grafana, influx, etc (without high speed data access)
hosts:
192.168.90.11
dev_server:
hosts:
192.168.90.1
192.168.90.4:
# Host running the interface services
interface:
hosts:
192.168.90.1
192.168.90.11
mastercontroller:
hosts:
192.168.90.1
192.168.90.11
registry:
hosts:
192.168.90.1
192.168.90.11
grafana:
hosts:
192.168.90.1
192.168.90.11
redis:
hosts:
192.168.90.1
192.168.90.11
influx:
hosts:
192.168.90.1
192.168.90.11
......@@ -6,7 +6,7 @@
# provisioning systems. The default "latest" is set in the common role and
# overriden here for the production environment.
version_tag: "210825.0"
version_tag: "210902.0"
# These settings specify the repositories used to build the EDD
# In particular useful to be tweaked in a local custom mysite_dev inventory to develop the provisioning system.
......
......@@ -9,8 +9,10 @@
- role: pulsar_pipeline
container_name: pulsar_timing1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9700
- role: gated_full_stokes_spectrometer
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9702
- hosts: gpu_server[0]
vars:
......@@ -22,8 +24,10 @@
- role: pulsar_pipeline
container_name: pulsar_baseband1
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9703
- role: pulsar_pipeline
container_name: pulsar_searching1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9701
- role: fits_interface
container_env: "EDD_ALLOWED_NUMA_NODES=0"
......@@ -8,9 +8,11 @@
roles:
- role: gated_full_stokes_spectrometer
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9702
- role: pulsar_pipeline
container_name: pulsar_timing1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9700
- hosts: gpu_server[0]
vars:
......@@ -22,5 +24,6 @@
- role: pulsar_pipeline
container_name: pulsar_timing2
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9700
- role: fits_interface
container_env: "EDD_ALLOWED_NUMA_NODES=0"
......@@ -9,8 +9,10 @@
- role: pulsar_pipeline
container_name: pulsar_timing1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9700
- role: gated_full_stokes_spectrometer
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9702
- hosts: gpu_server[0]
vars:
......@@ -29,9 +31,11 @@
- role: pulsar_pipeline
container_name: pulsar_baseband_leap
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9703
- role: pulsar_pipeline
container_name: pulsar_timing2
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9700
- role: fits_interface
container_env: "EDD_ALLOWED_NUMA_NODES=0"
......
......@@ -3,18 +3,16 @@
# Provision configuration for the run. This defines which edd components are
# set up.
- hosts: gpu_server[1]
vars:
edd_group: 50000
roles:
- role: pulsar_pipeline
container_name: pulsar_timing1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9700
- role: gated_full_stokes_spectrometer
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9702
- hosts: gpu_server[0]
vars:
edd_group: 50000
roles:
- role: dig_pack_controller
container_name: dig_pack_controller
......@@ -22,8 +20,10 @@
- role: pulsar_pipeline
container_name: pulsar_baseband1
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9703
- role: pulsar_pipeline
container_name: pulsar_searching1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9701
- role: fits_interface
container_env: "EDD_ALLOWED_NUMA_NODES=0"
......@@ -9,8 +9,10 @@
- role: pulsar_pipeline
container_name: pulsar_timing1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9700
- role: gated_full_stokes_spectrometer
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9702
- hosts: gpu_server[0]
vars:
......@@ -22,8 +24,10 @@
- role: pulsar_pipeline
container_name: pulsar_baseband1
container_env: "EDD_ALLOWED_NUMA_NODES=0"
edd_group: 9703
- role: pulsar_pipeline
container_name: pulsar_searching1
container_env: "EDD_ALLOWED_NUMA_NODES=1"
edd_group: 9701
- role: fits_interface
container_env: "EDD_ALLOWED_NUMA_NODES=0"
......@@ -26,18 +26,16 @@
{
"id": "gated_spectrometer",
"input_data_streams":
{
"polarization_0":
[
{
"source": "dig_pack_controller:polarization_0",
"format": "MPIFR_EDD_Packetizer:1"
},
"polarization_1":
{
"source": "dig_pack_controller:polarization_1",
"format": "MPIFR_EDD_Packetizer:1"
}
},
],
"output_data_streams":
{
"polarization_0_0":
......
{
"products": [
{
"id": "dig_pack_controller",
"bit_depth": 8,
"sampling_rate": 4000000000.0,
"predecimation_factor": 2,
"flip_spectrum": true,
"output_data_streams": {
"polarization_0": {
"format": "MPIFR_EDD_Packetizer:1",
"ip": "225.0.0.140+3",
"port": "7148"
},
"polarization_1": {
"format": "MPIFR_EDD_Packetizer:1",
"ip": "225.0.0.144+3",
"port": "7148"
}
}
},
{
"id": "gated_stokes_spectrometer",
"input_data_streams": {
"polarization_0": {
"source": "dig_pack_controller:polarization_0",
"format": "MPIFR_EDD_Packetizer:1"
},
"polarization_1": {
"source": "dig_pack_controller:polarization_1",
"format": "MPIFR_EDD_Packetizer:1"
}
},
"output_data_streams": {
"Stokes_I_0": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.172",
"port": "7152"
},
"Stokes_I_1": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.173",
"port": "7152"
},
"Stokes_Q_0": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.174",
"port": "7152"
},
"Stokes_Q_1": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.175",
"port": "7152"
},
"Stokes_U_0": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.176",
"port": "7152"
},
"Stokes_U_1": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.177",
"port": "7152"
},
"Stokes_V_0": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.178",
"port": "7152"
},
"Stokes_V_1": {
"format": "GatedSpectrometer:1",
"ip": "225.0.1.179",
"port": "7152"
}
},
"naccumulate": 16384,
"fft_length": 262144
},
{
"id": "fits_interface",
"input_data_streams": [
{
"source": "gated_stokes_spectrometer:Stokes_I_0",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_Q_0",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_U_0",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_V_0",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_I_1",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_Q_1",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_U_1",
"format": "GatedSpectrometer:1"
},
{
"source": "gated_stokes_spectrometer:Stokes_V_1",
"format": "GatedSpectrometer:1"
}
]
}
]
}
---
########################################################################
# Provision configuration for the run. This defines which edd components are
# set up.
- hosts: gpu_server[1]
roles:
- gated_full_stokes_spectrometer
vars:
container_env: "EDD_ALLOWED_NUMA_NODES=1"
- hosts: gpu_server[0]
roles:
- role: fits_interface
- role: dig_pack_controller
device: packetizer
......@@ -55,6 +55,8 @@
force_kill: yes # avoid lag between rm and restart
volumes:
- "/var/run/docker.sock:/var/run/docker.sock"
- "{{ data_base_path}}:{{ data_base_path }}"
register: result
retries: 3 # If container launch fails, mostly due to delay in removal of old container, retry after delay
delay: 15
......
......@@ -22,7 +22,7 @@
- docker_container:
name: "{{ container_name }}"
image: "{{ docker_registry }}:{{ docker_registry_port }}/{{ image_name }}:{{ version_tag }}"
command: "{{ container_cmd }} {% if register_as_pipeline is not defined or register_as_pipeline %} --redis-ip {{ redis_storage }} --redis-port {{ redis_port }} --register-id={{ container_name }} {% endif %} {% if log_level is defined %} --log-level={{ log_level }} {%endif%}"
command: "sh -c \"umask 002; {{ container_cmd }} {% if register_as_pipeline is not defined or register_as_pipeline %} --redis-ip {{ redis_storage }} --redis-port {{ redis_port }} --register-id={{ container_name }} {% endif %} {% if log_level is defined %} --log-level={{ log_level }} {%endif%}\""
state: started # ensure that is running
pull: yes
privileged: yes
......
......@@ -39,6 +39,9 @@
copy:
content: "{{ dck_lg.stderr_lines | join('\n') }}"
dest: "{{ data_dir }}/{{ container_name }}_{{ ansible_date_time.iso8601_micro}}.log"
mode: g+rw
owner: "{{ edd_user }}"
group: "{{ edd_group }}"
ignore_errors: yes
- docker_container:
......
......@@ -81,7 +81,7 @@
"type": "fill"
}
],
"measurement": "GatedSpectrometer",
"measurement": "{{ measurement }}",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT last(\"input_buffer_total_write\") FROM \"{{ measurement }}\" WHERE (\"pipeline_id\" = '{{ container_name }}') AND $timeFilter GROUP BY time($__interval) fill(null)",
......@@ -120,7 +120,7 @@
"type": "fill"
}
],
"measurement": "GatedSpectrometer",
"measurement": "{{ measurement }}",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT last(\"output_buffer_total_read\") FROM \"{{ measurement }}\" WHERE (\"pipeline_id\" = '{{ container_name }}') AND $timeFilter GROUP BY time($__interval) fill(null)",
......@@ -419,7 +419,7 @@
"type": "fill"
}
],
"measurement": "GatedSpectrometer",
"measurement": "{{ measurement }}",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT max(\"input_buffer_fill_level\") FROM \"{{ measurement }}\" WHERE (\"pipeline_id\" = '{{ container_name }}') AND $timeFilter GROUP BY time($__interval) fill(null)\n",
......
......@@ -272,7 +272,7 @@
"measurement": "{{ measurement }}",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT \"pipeline_status\" FROM \"{{ measurement }}\" ",
"query": "SELECT \"pipeline_status\" FROM \"{{ measurement }}\" WHERE (\"pipeline_id\" = '{{ container_name }}') AND $timeFilter",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
......@@ -349,4 +349,86 @@
"writeAllValues": false,
"writeLastValue": true,
"writeMetricNames": false
},
{
"datasource": null,
"description": "",
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 7,
"x": 14,
"y":105
},
"id": 2093487693492,
"options": {
"showHeader": false
},
"pluginVersion": "7.3.5",
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}
],
"measurement": "{{ measurement }}",
"orderByTime": "ASC",
"policy": "default",
"query": "SELECT \"pipeline_status\" FROM \"{{ measurement }}\" WHERE (\"pipeline_id\" = '{{ container_name }}') AND $timeFilter",
"rawQuery": true,
"refId": "A",
"resultFormat": "time_series",
"select": [
[
{
"params": [
"pipeline_status"
],
"type": "field"
},
{
"params": [],
"type": "mean"
}
]
],
"tags": []
}
],
"timeFrom": null,
"timeShift": null,
"title": "Pipeline Status",
"type": "table"
}
......@@ -99,8 +99,185 @@
"timeShift": null,
"title": "Level",
"type": "aidanmountford-html-panel"
},
{
},
{
"datasource": null,
"fieldConfig": {
"defaults": {
"custom": {
"align": null,
"filterable": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y":300
},
"id": 2093487693496,
"options": {
"showHeader": true
},
"pluginVersion": "7.3.5",
"targets": [
{
"groupBy": [
{
"params": [
"$__interval"
],
"type": "time"
},
{
"params": [
"null"
],
"type": "fill"
}