diff --git a/warpsrvint/docker_kapacitor/handlers/main.yaml b/warpsrvint/docker_kapacitor/handlers/main.yaml new file mode 100644 index 0000000000000000000000000000000000000000..04e217e8ec0446626eb8fcfb9a10df04c927263a --- /dev/null +++ b/warpsrvint/docker_kapacitor/handlers/main.yaml @@ -0,0 +1,7 @@ +--- + +- name: restart kapacitor docker + docker_service: + project_src: /srv/kapacitor/ + state: present + restarted: yes diff --git a/warpsrvint/docker_kapacitor/tasks/main.yaml b/warpsrvint/docker_kapacitor/tasks/main.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d888c731d3eae5b9eea23955c8d7b69d0c3a1e78 --- /dev/null +++ b/warpsrvint/docker_kapacitor/tasks/main.yaml @@ -0,0 +1,58 @@ +--- +- include: ../functions/get_secret.yml + with_items: + - { path: /srv/influx/influx_admin_pw, length: 24 } + - { path: /srv/kapacitor/alerta_token, length: -1 } + +- name: create folder struct for kapacitor + file: + path: "{{ item }}" + state: "directory" + with_items: + - /srv/kapacitor/ + - /srv/kapacitor/data/ + - /srv/kapacitor/load/ + - /srv/kapacitor/load/tasks/ + - /srv/kapacitor/load/templates/ + - /srv/kapacitor/load/handlers/ + + +- name: Konfig-Dateien erstellen + template: + src: "{{ item }}" + dest: "/srv/kapacitor/{{ item }}" + with_items: + - docker-compose.yml + - kapacitor.conf + notify: restart kapacitor docker + +- name: Script-Dateien erstellen + template: + src: "{{ item }}" + dest: "/srv/kapacitor/{{ item }}" + mode: "o+rwx" + with_items: + - kapacitor_listtasks.sh + - kapacitor_show.sh + - kapacitor_watch.sh + +- name: Kapacitor-Tasks erstellen + template: + src: "{{ item }}" + dest: "/srv/kapacitor/load/tasks/{{ item }}" + with_items: + - task_cpu_high.tick + - task_disk_low.tick + - task_load1_high.tick + - task_load5_high.tick + - task_load15_high.tick + - task_mem_used.tick + - task_notraffic_globe.tick + - task_notraffic_telekom.tick + notify: restart kapacitor docker + + +- name: start kapacitor docker + docker_service: + project_src: /srv/kapacitor/ + state: present diff --git a/warpsrvint/docker_kapacitor/templates/docker-compose.yml b/warpsrvint/docker_kapacitor/templates/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..1266c2c501a5852791f4383c9db5e6371c07e248 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/docker-compose.yml @@ -0,0 +1,14 @@ +version: "3" + +services: + + app: + + image: kapacitor:1.5 + restart: always + ports: + - 0.0.0.0:9092:9092 + volumes: + - /srv/kapacitor/kapacitor.conf:/etc/kapacitor/kapacitor.conf:ro + - /srv/kapacitor/data/:/var/lib/kapacitor/ + - /srv/kapacitor/load/:/etc/kapacitor/load diff --git a/warpsrvint/docker_kapacitor/templates/kapacitor.conf b/warpsrvint/docker_kapacitor/templates/kapacitor.conf new file mode 100644 index 0000000000000000000000000000000000000000..65e62af556a824f9edd02363743874704024d307 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/kapacitor.conf @@ -0,0 +1,232 @@ + +# The hostname of this node. +# Must be resolvable by any configured InfluxDB hosts. +hostname = "10.5.0.111" + +# Directory for storing a small amount of metadata about the server. +data_dir = "/var/lib/kapacitor" + +# Do not apply configuration overrides during startup. +# Useful if the configuration overrides cause Kapacitor to fail startup. +# This option is intended as a safe guard and should not be needed in practice. +skip-config-overrides = true + +# Default retention-policy, if a write is made to Kapacitor and +# it does not have a retention policy associated with it, +# then the retention policy will be set to this value +default-retention-policy = "" + + +[http] + # HTTP API Server for Kapacitor + # This server is always on, + # it serves both as a write endpoint + # and as the API endpoint for all other + # Kapacitor calls. + bind-address = ":9092" + log-enabled = true + write-tracing = false + pprof-enabled = false + https-enabled = false + + +[config-override] + # Enable/Disable the service for overridding configuration via the HTTP API. + enabled = true + + +[logging] + # Destination for logs + # Can be a path to a file or 'STDOUT', 'STDERR'. + file = "STDOUT" + # Logging level can be one of: + # DEBUG, INFO, ERROR + # HTTP logging can be disabled in the [http] config section. + level = "INFO" + + +[load] + # Enable/Disable the service for loading tasks/templates/handlers + # from a directory + enabled = true + # Directory where task/template/handler files are set + dir = "/etc/kapacitor/load" + + +[replay] + # Where to store replay files, aka recordings. + dir = "/var/lib/kapacitor/replay" + + +[task] + # Where to store the tasks database + # DEPRECATED: This option is not needed for new installations. + # It is only used to determine the location of the task.db file + # for migrating to the new `storage` service. + dir = "/var/lib/kapacitor/tasks" + # How often to snapshot running task state. + snapshot-interval = "60s" + + +[storage] + # Where to store the Kapacitor boltdb database + boltdb = "/var/lib/kapacitor/kapacitor.db" + + +[deadman] + # Configure a deadman's switch + # Globally configure deadman's switches on all tasks. + # NOTE: for this to be of use you must also globally configure at least one alerting method. + global = false + # Threshold, if globally configured the alert will be triggered if the throughput in points/interval is <= threshold. + threshold = 5.0 + # Interval, if globally configured the frequency at which to check the throughput. + interval = "10s" + {% raw %} + # Id -- the alert Id, NODE_NAME will be replaced with the name of the node being monitored. + id = "node 'NODE_NAME' in task '{{ .TaskName }}'" + # The message of the alert. INTERVAL will be replaced by the interval. + message = "{{ .ID }} is {{ if eq .Level \"OK\" }}alive{{ else }}dead{{ end }}: {{ index .Fields \"collected\" | printf \"%0.3f\" }} points/INTERVAL." + {% endraw %} + +# Multiple InfluxDB configurations can be defined. +# Exactly one must be marked as the default. +# Each one will be given a name and can be referenced in batch queries and InfluxDBOut nodes. +[[influxdb]] + # Connect to an InfluxDB cluster + # Kapacitor can subscribe, query and write to this cluster. + # Using InfluxDB is not required and can be disabled. + enabled = true + default = true + name = "sysmon" + urls = [ "{{ influxdb_sysmon.url }}" ] + username = "admin" + password = "{{ influx_admin_pw }}" + timeout = 0 + + # Do not verify the TLS/SSL certificate. + # This is insecure. + insecure-skip-verify = false + + # Maximum time to try and connect to InfluxDB during startup + startup-timeout = "5m" + + # Turn off all subscriptions + disable-subscriptions = false + + # Subscription mode is either "cluster" or "server" + subscription-mode = "server" + + # Which protocol to use for subscriptions + # one of 'udp', 'http', or 'https'. + subscription-protocol = "http" + + # Subscriptions resync time interval + # Useful if you want to subscribe to new created databases + # without restart Kapacitord + subscriptions-sync-interval = "1m0s" + + # Override the global hostname option for this InfluxDB cluster. + # Useful if the InfluxDB cluster is in a separate network and + # needs special config to connect back to this Kapacitor instance. + # Defaults to `hostname` if empty. + kapacitor-hostname = "" + + # Override the global http port option for this InfluxDB cluster. + # Useful if the InfluxDB cluster is in a separate network and + # needs special config to connect back to this Kapacitor instance. + # Defaults to the port from `[http] bind-address` if 0. + http-port = 0 + + # Host part of a bind address for UDP listeners. + # For example if a UDP listener is using port 1234 + # and `udp-bind = "hostname_or_ip"`, + # then the UDP port will be bound to `hostname_or_ip:1234` + # The default empty value will bind to all addresses. + udp-bind = "" + # Subscriptions use the UDP network protocl. + # The following options of for the created UDP listeners for each subscription. + # Number of packets to buffer when reading packets off the socket. + udp-buffer = 1000 + # The size in bytes of the OS read buffer for the UDP socket. + # A value of 0 indicates use the OS default. + udp-read-buffer = 0 + + [influxdb.subscriptions] + # Set of databases and retention policies to subscribe to. + # If empty will subscribe to all, minus the list in + # influxdb.excluded-subscriptions + # + # Format + # db_name = <list of retention policies> + # + # Example: + # my_database = [ "default", "longterm" ] + [influxdb.excluded-subscriptions] + # Set of databases and retention policies to exclude from the subscriptions. + # If influxdb.subscriptions is empty it will subscribe to all + # except databases listed here. + # + # Format + # db_name = <list of retention policies> + # + # Example: + # my_database = [ "default", "longterm" ] + + +[alerta] + # Configure Alerta. + enabled = true + # The Alerta URL. + url = "https://alerta.warpzone.ms/api" + # Default authentication token. + token = "{{ alerta_token }}" + # Default token prefix + # If you are on older versions of alerta you may need to change this to "Key" + token-prefix = "Key" + # Default environment. + environment = "warpzone" + # Default origin. + origin = "Kapacitor" + + +[udf] +# Configuration for UDFs (User Defined Functions) +[udf.functions] + # Example go UDF. + # First compile example: + # go build -o avg_udf ./udf/agent/examples/moving_avg.go + # + # Use in TICKscript like: + # stream.goavg() + # .field('value') + # .size(10) + # .as('m_average') + # + # uncomment to enable + #[udf.functions.goavg] + # prog = "./avg_udf" + # args = [] + # timeout = "10s" + + # Example python UDF. + # Use in TICKscript like: + # stream.pyavg() + # .field('value') + # .size(10) + # .as('m_average') + # + # uncomment to enable + #[udf.functions.pyavg] + # prog = "/usr/bin/python2" + # args = ["-u", "./udf/agent/examples/moving_avg.py"] + # timeout = "10s" + # [udf.functions.pyavg.env] + # PYTHONPATH = "./udf/agent/py" + + # Example UDF over a socket + #[udf.functions.myCustomUDF] + # socket = "/path/to/socket" + # timeout = "10s" + + diff --git a/warpsrvint/docker_kapacitor/templates/kapacitor_listtasks.sh b/warpsrvint/docker_kapacitor/templates/kapacitor_listtasks.sh new file mode 100644 index 0000000000000000000000000000000000000000..1a279f21bb703245d4356e34cefc0109466b0b22 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/kapacitor_listtasks.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Show Kapacitor Tasks +# Usage: ./kapacitor_listtasks.sh + +docker-compose exec app kapacitor list tasks diff --git a/warpsrvint/docker_kapacitor/templates/kapacitor_show.sh b/warpsrvint/docker_kapacitor/templates/kapacitor_show.sh new file mode 100644 index 0000000000000000000000000000000000000000..152f907a318e786540fc3d5c0059a494e1d917a5 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/kapacitor_show.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Show Kapacitor Task +# Usage: ./kapacitor_show.sh <task> + +docker-compose exec app kapacitor show $1 diff --git a/warpsrvint/docker_kapacitor/templates/kapacitor_watch.sh b/warpsrvint/docker_kapacitor/templates/kapacitor_watch.sh new file mode 100644 index 0000000000000000000000000000000000000000..f1dece53b88916da87965cebbace97f7afa948fd --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/kapacitor_watch.sh @@ -0,0 +1,8 @@ +#!/bin/sh +# Show Kapacitor Task Logs +# Usage: ./kapacitor_watch.sh <task> + +docker-compose exec app kapacitor show $1 + +docker-compose exec app kapacitor reload $1 +docker-compose exec app kapacitor watch $1 diff --git a/warpsrvint/docker_kapacitor/templates/task_cpu_high.tick b/warpsrvint/docker_kapacitor/templates/task_cpu_high.tick new file mode 100644 index 0000000000000000000000000000000000000000..7c56e993f211bd290d2c7a512ff4bb5747aedfae --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_cpu_high.tick @@ -0,0 +1,24 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + + +var data =batch + |query(''' SELECT mean("usage_user") + mean("usage_system") + mean("usage_iowait") AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."cpu" ''') + .period(5m) + .every(30s) + .fill('linear') + .groupBy('host') + +{% raw %} + +data + |alert() + .id('{{ index .Tags "host"}}/cpu_high') + .message('{{ .ID }}:{{ index .Fields "value" }}') + .info(lambda: "value" > 60 ) + .warn(lambda: "value" > 80 ) + .crit(lambda: "value" > 90 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_disk_low.tick b/warpsrvint/docker_kapacitor/templates/task_disk_low.tick new file mode 100644 index 0000000000000000000000000000000000000000..22a59c7e33e237758e6d1fdb34f2a3d9c2dc6e39 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_disk_low.tick @@ -0,0 +1,24 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + + +var data =batch + |query(''' SELECT (mean("total") - mean("free")) / mean("total") * 100 AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."disk" ''') + .period(5m) + .every(30s) + .fill('linear') + .groupBy('host','path') + +{% raw %} + +data + |alert() + .id('{{ index .Tags "host"}}::{{ index .Tags "path" }}::disk_low') + .message('{{ .ID }}::{{ index .Fields "value" }}') + .info(lambda: "value" > 80 ) + .warn(lambda: "value" > 95 ) + .crit(lambda: "value" > 98 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_load15_high.tick b/warpsrvint/docker_kapacitor/templates/task_load15_high.tick new file mode 100644 index 0000000000000000000000000000000000000000..f35252169a1905c4e82c32937771f5ddf590fbbb --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_load15_high.tick @@ -0,0 +1,24 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + + +var data =batch + |query(''' SELECT mean(load15) AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."system" ''') + .period(5m) + .every(30s) + .fill('linear') + .groupBy('host') + +{% raw %} + +data + |alert() + .id('{{ index .Tags "host"}}/load15_high') + .message('{{ .ID }}:{{ index .Fields "value" }}') + .info(lambda: "value" > 4 ) + .warn(lambda: "value" > 8 ) + .crit(lambda: "value" > 16 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_load1_high.tick b/warpsrvint/docker_kapacitor/templates/task_load1_high.tick new file mode 100644 index 0000000000000000000000000000000000000000..ebd775b7b1520f03889370eb5a65654fd76346b9 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_load1_high.tick @@ -0,0 +1,24 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + + +var data =batch + |query(''' SELECT mean(load1) AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."system" ''') + .period(5m) + .every(30s) + .fill('linear') + .groupBy('host') + +{% raw %} + +data + |alert() + .id('{{ index .Tags "host"}}/load1_high') + .message('{{ .ID }}:{{ index .Fields "value" }}') + .info(lambda: "value" > 6 ) + .warn(lambda: "value" > 12 ) + .crit(lambda: "value" > 24 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_load5_high.tick b/warpsrvint/docker_kapacitor/templates/task_load5_high.tick new file mode 100644 index 0000000000000000000000000000000000000000..f44c8d3e16721db781f24c1c9bb7fff38928a8c0 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_load5_high.tick @@ -0,0 +1,24 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + + +var data =batch + |query(''' SELECT mean(load5) AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."system" ''') + .period(5m) + .every(30s) + .fill('linear') + .groupBy('host') + +{% raw %} + +data + |alert() + .id('{{ index .Tags "host"}}/load5_high') + .message('{{ .ID }}:{{ index .Fields "value" }}') + .info(lambda: "value" > 5 ) + .warn(lambda: "value" > 10 ) + .crit(lambda: "value" > 20 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_mem_used.tick b/warpsrvint/docker_kapacitor/templates/task_mem_used.tick new file mode 100644 index 0000000000000000000000000000000000000000..d43f0426059cdc82cfa15bfe125ff2f442026b32 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_mem_used.tick @@ -0,0 +1,24 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + + +var data =batch + |query(''' SELECT (( mean(total) - mean(free) ) / mean(total) * 100) AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."mem" ''') + .period(5m) + .every(30s) + .fill('linear') + .groupBy('host') + +{% raw %} + +data + |alert() + .id('{{ index .Tags "host"}}/mem_used') + .message('{{ .ID }}:{{ index .Fields "value" }}') + .info(lambda: "value" > 85 ) + .warn(lambda: "value" > 90 ) + .crit(lambda: "value" > 95 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_notraffic_globe.tick b/warpsrvint/docker_kapacitor/templates/task_notraffic_globe.tick new file mode 100644 index 0000000000000000000000000000000000000000..da68de9e20d40559f58dce5b4a0c31ade436208a --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_notraffic_globe.tick @@ -0,0 +1,18 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + +var data =batch + |query(''' SELECT sum("packets_recv") AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."mem" WHERE ("interface" = 'igb1') ''') + .period(5m) + .every(30s) + +{% raw %} + +data + |alert() + .id('No Traffic for 5m on Globe') + .crit(lambda: "value" > 9 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/docker_kapacitor/templates/task_notraffic_telekom.tick b/warpsrvint/docker_kapacitor/templates/task_notraffic_telekom.tick new file mode 100644 index 0000000000000000000000000000000000000000..01bef341d14dd1cab2cfc266555d26730d985723 --- /dev/null +++ b/warpsrvint/docker_kapacitor/templates/task_notraffic_telekom.tick @@ -0,0 +1,18 @@ + + +dbrp "{{ influxdb_sysmon.db }}"."autogen" + +var data =batch + |query(''' SELECT sum("packets_recv") AS value FROM "{{ influxdb_sysmon.db }}"."autogen"."mem" WHERE ("interface" = 'igb0') ''') + .period(5m) + .every(30s) + +{% raw %} + +data + |alert() + .id('No Traffic for 5m on Telekom') + .crit(lambda: "value" > 9 ) + .alerta() + +{% endraw %} diff --git a/warpsrvint/main.yml b/warpsrvint/main.yml index e8fc433e46e895a34e24fca8e088702f268c2823..796cbd87d63755b4a51789810a9b829333742e4e 100644 --- a/warpsrvint/main.yml +++ b/warpsrvint/main.yml @@ -11,6 +11,7 @@ - { role: docker_grafana, tags: grafana } - { role: docker_influx, tags: influx } - { role: docker_iobroker, tags: iobroker } + - { role: docker_kapacitor, tags: kapacitor } - { role: docker_librenms, tags: librenms } - { role: docker_l4z0r, tags: l4z0r } # - { role: docker_ldap, tags: ldap }