diff --git a/common/prometheus-node/tasks/main.yml b/common/prometheus-node/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..8165ba04f485a2210b27d9c0707579380915ca6a --- /dev/null +++ b/common/prometheus-node/tasks/main.yml @@ -0,0 +1,18 @@ +--- +# Pakete installieren +- name: pakete installieren + apt: + pkg: "{{ item }}" + update_cache: yes + state: installed + with_items: + - prometheus-node-exporter + +- name: reload systemd and enable service + systemd: + name: prometheus-node-exporter + enabled: yes + daemon_reload: yes + +- name: restart prometheus-node-exporter + service: name=prometheus-node-exporter state=restarted diff --git a/host_vars/warpsrvint b/host_vars/warpsrvint index 7920ffd32f6dcd724481accb1aa153dfd772dc59..48624fcc074506f88ecfc65627dea3e741a3e7d3 100644 --- a/host_vars/warpsrvint +++ b/host_vars/warpsrvint @@ -10,7 +10,7 @@ debian_sources: - "deb http://security.debian.org/ jessie/updates main contrib non-free" - "deb http://debian.uni-duisburg-essen.de/debian/ jessie-updates main contrib non-free" - "deb https://apt.dockerproject.org/repo debian-jessie main" - - "deb http://http.debian.net/debian wheezy-backports main" + - "deb http://http.debian.net/debian jessie-backports main" debian_keys: diff --git a/warpsrvint/docker_grafana/tasks/main.yml b/warpsrvint/docker_grafana/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..6078eda2b3bb6c9cc48ab1f63e81d39fa97db48f --- /dev/null +++ b/warpsrvint/docker_grafana/tasks/main.yml @@ -0,0 +1,65 @@ +--- +# Einige Secrets sind auf dem Server lokal gespeichert und werden von dort gelesen +# Auslesen der Dateien vom Server, zwischengespeicert wird in der Variable gitlab_secrets +# Anschließend müssen die entsprechenden Einträge aus gitlab_secrets extrahiert werden +# Die Daten, die von Slurp gelesen werden sind Base64 codiert +# Zur Sicherheit werden Whitespace-Zeichen entfert, damit z.B. Zeilenumbrüche nicht übernommen werden + +- name: get secrets from server 1 + slurp: src={{ item }} + with_items: + - /srv/ldap/secret/ldap_readonly_pass + register: warpinfra_secrets + +- name: get secrets from server 2 + set_fact: + ldap_readonly_pass: "{{ warpinfra_secrets.results | selectattr('item', 'equalto', '/srv/ldap/secret/ldap_readonly_pass') | map(attribute='content') | list | first | b64decode | regex_replace('\\s', '') }}" + +- name: get secrets from server 1 + slurp: src={{ item }} + with_items: + - /srv/grafana/grafana_admin_pass + register: grafana_secrets + +- name: get secrets from server 2 + set_fact: + grafana_admin_pass: "{{ grafana_secrets.results | selectattr('item', 'equalto', '/srv/grafana/grafana_admin_pass') | map(attribute='content') | list | first | b64decode | regex_replace('\\s', '') }}" + +- name: create folder struct for grafana + file: + path: "{{ item }}" + state: "directory" + with_items: + - "/srv/grafana/" + - "/srv/grafana/config/" + - "/srv/grafana/data/" + +- name: create config files + template: src={{ item }} dest=/srv/grafana/config/{{ item }} + with_items: + - grafana.ini + - ldap.toml + register: config + +- name: stop grafana-app docker + docker_container: + name: grafana-app + state: absent + when: config.changed + +- name: start grafana-app docker + docker_container: + name: grafana-app + image: grafana/grafana:4.4.1 + state: started + restart_policy: always + volumes: + - /srv/grafana/config/grafana.ini:/etc/grafana/grafana.ini + - /srv/grafana/config/ldap.toml:/etc/grafana/ldap.toml + - /srv/grafana/data/:/var/lib/grafana + ports: + - 3000:3000 + env: + GF_SERVER_ROOT_URL: "http://10.5.0.111:3000" + GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_admin_pass }}" + diff --git a/warpsrvint/docker_grafana/templates/grafana.ini b/warpsrvint/docker_grafana/templates/grafana.ini new file mode 100644 index 0000000000000000000000000000000000000000..1add827f84194d8e71598d3a77171048c290675d --- /dev/null +++ b/warpsrvint/docker_grafana/templates/grafana.ini @@ -0,0 +1,137 @@ +##################### Grafana Configuration ################################## +# +# Everything has defaults so you only need to uncomment things you want to +# change + +# possible values : production, development +app_mode = production + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +instance_name = intern + +# +#################################### Server #################################### +[server] +# Protocol (http, https, socket) +;protocol = http + +# The ip address to bind to, empty will bind to all interfaces +;http_addr = + +# The http port to use +;http_port = 3000 + +# The public facing domain name used to access grafana from a browser +;domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +;enforce_domain = false + +# The full public facing url you use in browser, used for redirects and emails +# If you use reverse proxy and sub path specify full url (with sub path) +;root_url = http://localhost:3000 + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +# Unix socket path +;socket = + + +#################################### Security #################################### +[security] +# default admin user, created on startup +;admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = admin + +# used for signing +;secret_key = SW2YcwTIb9zpOOhoPsMm + +# Auto-login remember days +;login_remember_days = 7 +;cookie_username = grafana_user +;cookie_remember_name = grafana_remember + +# disable gravatar profile images +;disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +[snapshots] +# snapshot sharing options +;external_enabled = true +;external_snapshot_url = https://snapshots-origin.raintank.io +;external_snapshot_name = Publish to snapshot.raintank.io + +# remove expired snapshot +;snapshot_remove_expired = true + +# remove snapshots after 90 days +;snapshot_TTL_days = 90 + +#################################### Users #################################### +[users] +# disable user signup / registration +allow_sign_up = false + +# Allow non admin users to create organizations +allow_org_create = false + +# Set to true to automatically assign new users to the default organization (id 1) +auto_assign_org = true + +# Default role new users will be automatically assigned (if disabled above is set to true) +auto_assign_org_role = Viewer + +# Background text for the user field on the login page +login_hint = infa.warpzone.ms account + +# Default UI theme ("dark" or "light") +default_theme = dark + +[auth] +# Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false +;disable_login_form = false + +# Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false +;disable_signout_menu = false + +#################################### Anonymous Auth ########################## +[auth.anonymous] +# enable anonymous access +enabled = true + +# specify organization name that should be used for unauthenticated users +org_name = Main Org. + +# specify role for unauthenticated users +org_role = Viewer + +#################################### Auth LDAP ########################## +[auth.ldap] +enabled = true +config_file = /etc/grafana/ldap.toml +allow_sign_up = true + + +#################################### Alerting ############################ +[alerting] +# Disable alerting engine & UI features +enabled = false +# Makes it possible to turn off alert rule execution but alerting UI is visible +execute_alerts = false + diff --git a/warpsrvint/docker_grafana/templates/ldap.toml b/warpsrvint/docker_grafana/templates/ldap.toml new file mode 100644 index 0000000000000000000000000000000000000000..dad2a61871b7a8d71ad12cb90f51ef5a6af4a8e0 --- /dev/null +++ b/warpsrvint/docker_grafana/templates/ldap.toml @@ -0,0 +1,59 @@ +# Set to true to log user information returned from LDAP +verbose_logging = false + +[[servers]] +# Ldap server host (specify multiple hosts space separated) +host = "10.0.20.2" +# Default port is 389 or 636 if use_ssl = true +port = 389 +# Set to true if ldap server supports TLS +use_ssl = false +# Set to true if connect ldap server with STARTTLS pattern (create connection in insecure, then upgrade to secure connection with TLS) +start_tls = false +# set to true if you want to skip ssl cert validation +ssl_skip_verify = false +# set to the path to your root CA certificate or leave unset to use system defaults +# root_ca_cert = "/path/to/certificate.crt" + +# Search user bind dn +bind_dn = "cn=readonly,dc=warpzone,dc=ms" +# Search user bind password +# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" +bind_password = '{{ldap_readonly_pass}}' + +# User search filter, for example "(cn=%s)" or "(sAMAccountName=%s)" or "(uid=%s)" +search_filter = "(uid=%s)" + +# An array of base dns to search through +search_base_dns = ["dc=warpzone,dc=ms"] + +# In POSIX LDAP schemas, without memberOf attribute a secondary query must be made for groups. +# This is done by enabling group_search_filter below. You must also set member_of= "cn" +# in [servers.attributes] below. + +## Group search filter, to retrieve the groups of which the user is a member (only set if memberOf attribute is not available) +# group_search_filter = "(&(objectClass=posixGroup)(memberUid=%s))" +## An array of the base DNs to search through for groups. Typically uses ou=groups +# group_search_base_dns = ["ou=groups,dc=grafana,dc=org"] + +# Specify names of the ldap attributes your ldap uses +[servers.attributes] +name = "givenName" +surname = "sn" +username = "uid" +member_of = "memberOf" +email = "email" + +# Map ldap groups to grafana org roles +[[servers.group_mappings]] +group_dn = "cn=grafana-admin,ou=infrastructure,dc=warpzone,dc=ms" +org_role = "Admin" + +[[servers.group_mappings]] +group_dn = "cn=active,ou=groups,dc=warpzone,dc=ms" +org_role = "Editor" + +[[servers.group_mappings]] +# If you want to match all (or no ldap groups) then you can use wildcard +group_dn = "*" +org_role = "Viewer" diff --git a/warpsrvint/docker_nodered/tasks/main.yml b/warpsrvint/docker_nodered/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..fbb362a0ae0ac1a117266326e55a012529777394 --- /dev/null +++ b/warpsrvint/docker_nodered/tasks/main.yml @@ -0,0 +1,18 @@ +--- +- name: create folder struct for nodered + file: + path: "/srv/nodered/data/" + state: "directory" + +- name: start nodered docker + docker_container: + name: nodered-app + image: nodered/node-red-docker:0.16.2 + state: started + restart_policy: always + volumes: + - /srv/nodered/data/:/data + ports: + - 1880:1880 + + \ No newline at end of file diff --git a/warpsrvint/docker_prometheus/tasks/main.yml b/warpsrvint/docker_prometheus/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..a83e64c3d52d9eb5686c39ea8666ba89397502ff --- /dev/null +++ b/warpsrvint/docker_prometheus/tasks/main.yml @@ -0,0 +1,90 @@ +--- + +- name: create folder struct for prometheus + file: + path: "{{ item }}" + state: "directory" + with_items: + - /srv/prometheus/ + - /srv/prometheus/config/ + - /srv/prometheus/data/ + - /srv/prometheus/alert-data/ + +- name: create config files + template: src={{ item }} dest=/srv/prometheus/config/{{ item }} + with_items: + - alertmanager.yml + - prometheus.yml + - prometheus.rules + register: config + +- name: start prometheus blackbox-exporter docker + docker_container: + name: prometheus-blackbox-exporter + image: prom/blackbox-exporter:v0.5.0 + state: started + restart_policy: always + ports: + - 0.0.0.0:9115:9115 + +- name: start prometheus snmp-exporter docker + docker_container: + name: prometheus-snmp-exporter + image: prom/snmp-exporter:v0.4.0 + state: started + restart_policy: always + ports: + - 0.0.0.0:9116:9116 + +- name: start prometheus statsd-exporter docker + docker_container: + name: prometheus-statsd-exporter + image: prom/statsd-exporter:v0.4.0 + state: started + restart_policy: always + ports: + - 0.0.0.0:9102:9102 + - 0.0.0.0:9125:9125/udp + +- name: stop prometheus-alertmanager docker + docker_container: + name: prometheus-alert + state: absent + when: config.changed + +- name: start prometheus-alertmanager docker + docker_container: + name: prometheus-alert + image: prom/alertmanager:v0.7.1 + state: started + restart_policy: always + volumes: + - /srv/prometheus/config/alertmanager.yml/:/etc/alertmanager/config.yml + - /srv/prometheus/alert-data/:/alertmanager + ports: + - 0.0.0.0:9093:9093 + +- name: stop prometheus docker + docker_container: + name: prometheus-app + state: absent + when: config.changed + +- name: start prometheus docker + docker_container: + name: prometheus-app + image: prom/prometheus:v1.7.1 + state: started + restart_policy: always + volumes: + - /srv/prometheus/config/prometheus.yml/:/etc/prometheus/prometheus.yml + - /srv/prometheus/config/prometheus.rules/:/etc/prometheus/prometheus.rules + - /srv/prometheus/data/:/prometheus + ports: + - 0.0.0.0:9090:9090 + links: + - prometheus-blackbox-exporter:blackbox-exporter + - prometheus-snmp-exporter:snmp-exporter + - prometheus-statsd-exporter:statsd-exporter + - prometheus-alert:alertmanager + diff --git a/warpsrvint/docker_prometheus/templates/alertmanager.yml b/warpsrvint/docker_prometheus/templates/alertmanager.yml new file mode 100644 index 0000000000000000000000000000000000000000..102205e12bc02bdee00652f1d2d8f5580fb006d0 --- /dev/null +++ b/warpsrvint/docker_prometheus/templates/alertmanager.yml @@ -0,0 +1,49 @@ +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'mail.warpzone.ms:25' + smtp_from: 'alert@warpzone.ms' + # smtp_auth_username: 'alertmanager' + # smtp_auth_password: 'password' + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + group_by: ['alertname', 'cluster', 'service'] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + + # A default receiver + receiver: mail + + +# Inhibition rules allow to mute a set of alerts given that another alert is +# firing. +# We use this to mute any warning-level notifications if the same alert is +# already critical. +inhibit_rules: +- source_match: + severity: 'critical' + target_match: + severity: 'warning' + # Apply inhibition if the alertname is the same. + equal: ['alertname', 'cluster', 'service'] + +receivers: +- name: 'mail' + email_configs: + - to: 'void@members.warpzone.ms' diff --git a/warpsrvint/docker_prometheus/templates/prometheus.rules b/warpsrvint/docker_prometheus/templates/prometheus.rules new file mode 100644 index 0000000000000000000000000000000000000000..43e8146a55b33d6a268e6f42af2b44d046208c8b --- /dev/null +++ b/warpsrvint/docker_prometheus/templates/prometheus.rules @@ -0,0 +1,18 @@ + +ALERT MEM_FULL_99P +IF ((node_memory_MemTotal - node_memory_MemFree )/ node_memory_MemTotal * 100) > 99 +FOR 15m + +ALERT HIGH_CPU +IF rate(node_cpu{mode = "idle"}[5m]) < 0.1 +FOR 15m + +ALERT HIGH_DISK_IO +IF node_disk_io_now > 10 +FOR 5m + +ALERT DISK_FULL_95P +IF ( 100 *(1 - (node_filesystem_free / node_filesystem_size) ) ) > 95 + +ALERT DISK_FULL_4H +IF predict_linear(node_filesystem_free[1h],4*3600) < 0 diff --git a/warpsrvint/docker_prometheus/templates/prometheus.yml b/warpsrvint/docker_prometheus/templates/prometheus.yml new file mode 100644 index 0000000000000000000000000000000000000000..052f6c14e563aa035b8f831a735bf804a23f0808 --- /dev/null +++ b/warpsrvint/docker_prometheus/templates/prometheus.yml @@ -0,0 +1,101 @@ +# my global config +global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'prometheus' + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + - "/etc/prometheus/prometheus.rules" + +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager:9093" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. + - job_name: 'prometheus' + + static_configs: + - targets: ['localhost:9090'] + labels: + group: 'service' + + + - job_name: 'node' + + static_configs: + - targets: ['warpsrvint:9100'] + labels: + group: 'server' + + + - job_name: 'snmp' + + metrics_path: /snmp + params: + module: [default] + static_configs: + - targets: + - warpfire + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: snmp-exporter:9116 + + + - job_name: 'ping' + metrics_path: /probe + params: + module: [icmp] + static_configs: + - targets: + - 212.124.34.241 # Next Hop Globe + - 2001:470:1f0a:a3b::1 # HE Tunnel Endpoint + - 8.8.8.8 # Google DNS + - 217.79.181.126 # Server MyLoc IPv4 + - 2001:4ba0:ffff:7c::1 # Server MyLoc IPv4 + - 10.5.0.1 # warpfire + - 192.168.0.100 # Switch HP + - 192.168.0.101 # Switch Brocade + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + + + - job_name: 'http' + metrics_path: /probe + params: + module: [http_2xx] + static_configs: + - targets: + - https://warpzone.ms + - https://gitlab.warpzone.ms + - https://infra.warpzone.ms + - https://mattermost.warpzone.ms + - https://pad.warpzone.ms + - https://wiki.warpzone.ms + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 diff --git a/warpsrvint/main.yml b/warpsrvint/main.yml index 0de88b384169126857c6f17e9590e30c442dd38d..44ea6e99b5061dd57a596f7677177981d74b42ad 100644 --- a/warpsrvint/main.yml +++ b/warpsrvint/main.yml @@ -3,10 +3,14 @@ - hosts: warpsrvint remote_user: root roles: + - { role: "../common/prometheus-node", tags: prometheus-node } - { role: nginx, tags: nginx } - { role: docker, tags: docker } + - { role: docker_grafana, tags: grafana } # - { role: docker_ldap, tags: ldap } - { role: docker_mqtt, tags: mqtt } + - { role: docker_nodered, tags: nodered } + - { role: docker_prometheus, tags: prometheus } - { role: docker_warpinfra, tags: warpinfra } - { role: docker_warpinfratest, tags: warpinfratest }