From 6ad4c172d23b88a0e0bace8488e4f53f6ee4ce6e Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Tue, 1 Sep 2020 20:10:10 +0200 Subject: ops --- ansible/host_vars/birgitte/ops-agent.yml | 17 +++ ansible/host_vars/malabaricus/ops-agent.yml | 29 ++++ ansible/plays/ops-agent.yml | 36 +++++ ansible/plays/ops-server.yml | 34 +++++ .../plays/templates/ops-agent/docker-compose.yml | 27 ++++ ansible/plays/templates/ops-agent/telegraf.conf | 55 ++++++++ .../plays/templates/ops-server/docker-compose.yml | 25 ++++ ansible/plays/templates/ops-server/influxdb.conf | 155 +++++++++++++++++++++ ansible/roles/docker-service/defaults/main.yml | 5 + ansible/roles/docker-service/handlers/main.yml | 7 + ansible/roles/docker-service/tasks/main.yml | 64 +++++++++ docker/birgitte/ops/docker-compose.yml | 25 ++++ docker/birgitte/ops/influxdb.conf | 155 +++++++++++++++++++++ docker/birgitte/owncloud/docker-compose.yml | 1 + 14 files changed, 635 insertions(+) create mode 100644 ansible/host_vars/birgitte/ops-agent.yml create mode 100644 ansible/host_vars/malabaricus/ops-agent.yml create mode 100644 ansible/plays/ops-agent.yml create mode 100644 ansible/plays/ops-server.yml create mode 100644 ansible/plays/templates/ops-agent/docker-compose.yml create mode 100644 ansible/plays/templates/ops-agent/telegraf.conf create mode 100644 ansible/plays/templates/ops-server/docker-compose.yml create mode 100644 ansible/plays/templates/ops-server/influxdb.conf create mode 100644 ansible/roles/docker-service/defaults/main.yml create mode 100644 ansible/roles/docker-service/handlers/main.yml create mode 100644 ansible/roles/docker-service/tasks/main.yml create mode 100644 docker/birgitte/ops/docker-compose.yml create mode 100644 docker/birgitte/ops/influxdb.conf diff --git a/ansible/host_vars/birgitte/ops-agent.yml b/ansible/host_vars/birgitte/ops-agent.yml new file mode 100644 index 0000000..5c2c3cf --- /dev/null +++ b/ansible/host_vars/birgitte/ops-agent.yml @@ -0,0 +1,17 @@ +telegraf_extra_mount_points: + - /dev/disk + - /dev/sda + +telegraf_sensors_enable: yes +telegraf_smart_enable: yes +# ls -1 /dev/disk/by-id/|grep ^ata|grep -v -- -part +telegraf_smart_devices: + - ata-APPLE_SSD_SM128_DFKUA04844EC992 + - ata-Corsair_CSSD-F120GB2_10446506330009980010 + - ata-HGST_HUS728T8TALE6L4_VDJA4N6K + - ata-HGST_HUS728T8TALE6L4_VDJARDYK + - ata-INTEL_SSDSC2CT240A4_CVKI304304XQ240DGN + - ata-INTEL_SSDSC2CT240A4_CVKI319001LL240DGN + - ata-INTEL_SSDSC2CW240A3_CVCV24100044240CGN + - ata-ST4000DM005-2DP166_WDH12006 + - ata-ST4000DM005-2DP166_WDH1209R diff --git a/ansible/host_vars/malabaricus/ops-agent.yml b/ansible/host_vars/malabaricus/ops-agent.yml new file mode 100644 index 0000000..72c221b --- /dev/null +++ b/ansible/host_vars/malabaricus/ops-agent.yml @@ -0,0 +1,29 @@ +telegraf_extra_config: | + [[inputs.multifile]] + base_dir = "/sys/class/thermal" + + ## If true discard all data when a single file can't be read. + ## Else, Telegraf omits the field generated from this file. + # fail_early = true + + ## Files to parse each interval. + [[inputs.multifile.file]] + file = "thermal_zone0/temp" + dest = "zone0" + conversion = "float(3)" + [[inputs.multifile.file]] + file = "thermal_zone1/temp" + dest = "zone1" + conversion = "float(3)" + [[inputs.multifile.file]] + file = "thermal_zone2/temp" + dest = "zone2" + conversion = "float(3)" + [[inputs.multifile.file]] + file = "thermal_zone3/temp" + dest = "zone3" + conversion = "float(3)" + [[inputs.multifile.file]] + file = "thermal_zone4/temp" + dest = "zone4" + conversion = "float(3)" diff --git a/ansible/plays/ops-agent.yml b/ansible/plays/ops-agent.yml new file mode 100644 index 0000000..992123a --- /dev/null +++ b/ansible/plays/ops-agent.yml @@ -0,0 +1,36 @@ +- hosts: + - birgitte + - malabaricus + tasks: +# Generate template telegraf.conf +# docker run --rm telegraf:1.14 bash +# Full config: +# +# telegraf config +# +# Smarter config: +# +# telegraf --input-filter cpu:disk:diskio:kernel:mem:processes:system:swap config|grep -v '^ *#'|uniq +# + + - become: yes + name: mkdir /etc/docker-service/ops-agent + file: + dest: /etc/docker-service/ops-agent + state: directory + owner: root + group: root + mode: u=rx + - become: yes + template: + src: "templates/ops-agent/telegraf.conf" + dest: "/etc/docker-service/ops-agent/telegraf.conf" + + - import_role: + name: docker-service + tags: docker-service + vars: + service: ops-agent + template: templates/ops-agent/docker-compose.yml +# systemd_enabled: no +# systemd_state: stopped diff --git a/ansible/plays/ops-server.yml b/ansible/plays/ops-server.yml new file mode 100644 index 0000000..adfcfc6 --- /dev/null +++ b/ansible/plays/ops-server.yml @@ -0,0 +1,34 @@ +- hosts: + - birgitte + vars: + grafana_basedir: /pool0/ops/grafana + tasks: + - become: yes + name: mkdir /etc/docker-service/ops-server + file: + dest: /etc/docker-service/ops-server + state: directory + owner: root + group: root + mode: u=rx + - become: yes + template: + src: "templates/ops-server/influxdb.conf" + dest: "/etc/docker-service/ops-server/" + + - become: yes + name: permissions + file: + dest: "{{ grafana_basedir }}" + owner: "472" + recurse: yes + + - import_role: + name: docker-service + tags: docker-service + vars: + service: ops-server + template: templates/ops-server/docker-compose.yml +# systemd_enabled: no +# systemd_state: stopped + diff --git a/ansible/plays/templates/ops-agent/docker-compose.yml b/ansible/plays/templates/ops-agent/docker-compose.yml new file mode 100644 index 0000000..c5a4ecc --- /dev/null +++ b/ansible/plays/templates/ops-agent/docker-compose.yml @@ -0,0 +1,27 @@ +version: "3" +services: + telegraf: + image: telegraf:1.15 + privileged: true + network_mode: host + volumes: + - /etc/docker-service/ops-agent/telegraf.conf:/etc/telegraf/telegraf.conf:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - /sys:/rootfs/sys:ro + - /proc:/rootfs/proc:ro + - /etc:/rootfs/etc:ro +{% for mp in telegraf_extra_mount_points|default([]) %} + - {{ mp }}:{{ mp }}:ro +{% endfor %} + + command: + - sh + - -c + - apt update && apt install -y --install-recommends=no smartmontools; exec telegraf + environment: + INFLUX_URL: "http://birgitte.vpn.trygvis.io:8086" + INFLUX_SKIP_DATABASE_CREATION: "true" + HOST_PROC: "/rootfs/proc" + HOST_SYS: "/rootfs/sys" + HOST_ETC: "/rootfs/etc" + HOST_MOUNT_PREFIX: "/rootfs" diff --git a/ansible/plays/templates/ops-agent/telegraf.conf b/ansible/plays/templates/ops-agent/telegraf.conf new file mode 100644 index 0000000..a408ef6 --- /dev/null +++ b/ansible/plays/templates/ops-agent/telegraf.conf @@ -0,0 +1,55 @@ +[global_tags] + +[agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "" + hostname = "{{ ansible_hostname }}" + omit_hostname = false + +[[inputs.cpu]] + percpu = true + totalcpu = true + collect_cpu_time = false + report_active = false + +[[inputs.disk]] + ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"] + +[[inputs.diskio]] +[[inputs.kernel]] +[[inputs.mem]] +[[inputs.processes]] +[[inputs.swap]] +[[inputs.system]] +[[inputs.docker]] + +{% if telegraf_sensors_enable|default(false) %} +[[inputs.sensors]] +{% endif %} + +{% if telegraf_smart_enable|default(false) %} +[[inputs.smart]] +{% set devs=telegraf_smart_devices|default([]) %} +{% if devs|length > 0 %} +devices = [ +{% for dev in devs %} + "{{ dev }}", +{% endfor %} +] +{% endif %} +{% endif %} + +[[outputs.influxdb]] + urls = ["$INFLUX_URL"] + skip_database_creation = false + +{% set cfg=telegraf_extra_config|default("") %} +{% if cfg|length > 0%} +{{ cfg }} +{% endif %} diff --git a/ansible/plays/templates/ops-server/docker-compose.yml b/ansible/plays/templates/ops-server/docker-compose.yml new file mode 100644 index 0000000..a3d7033 --- /dev/null +++ b/ansible/plays/templates/ops-server/docker-compose.yml @@ -0,0 +1,25 @@ +version: "3" +services: + grafana: + image: grafana/grafana:7.1.5 + networks: + - influx + environment: + GF_SERVER_ROOT_URL: https://grafana.trygvis.io + ports: + - "3000:3000" + volumes: + - {{ grafana_basedir }}:/var/lib/grafana + + idb: + image: influxdb:1.8.1 + ports: + - "8086:8086" + networks: + - influx + volumes: + - /home/trygvis/dev/io.trygvis/infra/docker/birgitte/ops/influxdb.conf:/etc/influxdb/influxdb.conf:ro + - /pool0/ops/influxdb:/var/lib/influxdb + +networks: + influx: diff --git a/ansible/plays/templates/ops-server/influxdb.conf b/ansible/plays/templates/ops-server/influxdb.conf new file mode 100644 index 0000000..9e41635 --- /dev/null +++ b/ansible/plays/templates/ops-server/influxdb.conf @@ -0,0 +1,155 @@ +reporting-disabled = false +bind-address = "127.0.0.1:8088" + +[meta] + dir = "/var/lib/influxdb/meta" + retention-autocreate = true + logging-enabled = true + +[data] + dir = "/var/lib/influxdb/data" + index-version = "inmem" + wal-dir = "/var/lib/influxdb/wal" + wal-fsync-delay = "0s" + validate-keys = false + query-log-enabled = true + cache-max-memory-size = 1073741824 + cache-snapshot-memory-size = 26214400 + cache-snapshot-write-cold-duration = "10m0s" + compact-full-write-cold-duration = "4h0m0s" + compact-throughput = 50331648 + compact-throughput-burst = 50331648 + max-series-per-database = 1000000 + max-values-per-tag = 100000 + max-concurrent-compactions = 0 + max-index-log-file-size = 1048576 + series-id-set-cache-size = 100 + series-file-max-concurrent-snapshot-compactions = 0 + trace-logging-enabled = false + tsm-use-madv-willneed = false + +[coordinator] + write-timeout = "10s" + max-concurrent-queries = 0 + query-timeout = "0s" + log-queries-after = "0s" + max-select-point = 0 + max-select-series = 0 + max-select-buckets = 0 + +[retention] + enabled = true + check-interval = "30m0s" + +[shard-precreation] + enabled = true + check-interval = "10m0s" + advance-period = "30m0s" + +[monitor] + store-enabled = true + store-database = "_internal" + store-interval = "10s" + +[subscriber] + enabled = true + http-timeout = "30s" + insecure-skip-verify = false + ca-certs = "" + write-concurrency = 40 + write-buffer-size = 1000 + +[http] + enabled = true + bind-address = ":8086" + auth-enabled = false + log-enabled = true + suppress-write-log = false + write-tracing = false + flux-enabled = false + flux-log-enabled = false + pprof-enabled = true + pprof-auth-enabled = false + debug-pprof-enabled = false + ping-auth-enabled = false + https-enabled = false + https-certificate = "/etc/ssl/influxdb.pem" + https-private-key = "" + max-row-limit = 0 + max-connection-limit = 0 + shared-secret = "" + realm = "InfluxDB" + unix-socket-enabled = false + unix-socket-permissions = "0777" + bind-socket = "/var/run/influxdb.sock" + max-body-size = 25000000 + access-log-path = "" + max-concurrent-write-limit = 0 + max-enqueued-write-limit = 0 + enqueued-write-timeout = 30000000000 + +[logging] + format = "auto" + level = "info" + suppress-logo = false + +[[graphite]] + enabled = false + bind-address = ":2003" + database = "graphite" + retention-policy = "" + protocol = "tcp" + batch-size = 5000 + batch-pending = 10 + batch-timeout = "1s" + consistency-level = "one" + separator = "." + udp-read-buffer = 0 + +[[collectd]] + enabled = false + bind-address = ":25826" + database = "collectd" + retention-policy = "" + batch-size = 5000 + batch-pending = 10 + batch-timeout = "10s" + read-buffer = 0 + typesdb = "/usr/share/collectd/types.db" + security-level = "none" + auth-file = "/etc/collectd/auth_file" + parse-multivalue-plugin = "split" + +[[opentsdb]] + enabled = false + bind-address = ":4242" + database = "opentsdb" + retention-policy = "" + consistency-level = "one" + tls-enabled = false + certificate = "/etc/ssl/influxdb.pem" + batch-size = 1000 + batch-pending = 5 + batch-timeout = "1s" + log-point-errors = true + +[[udp]] + enabled = false + bind-address = ":8089" + database = "udp" + retention-policy = "" + batch-size = 5000 + batch-pending = 10 + read-buffer = 0 + batch-timeout = "1s" + precision = "" + +[continuous_queries] + log-enabled = true + enabled = true + query-stats-enabled = false + run-interval = "1s" + +[tls] + min-version = "" + max-version = "" diff --git a/ansible/roles/docker-service/defaults/main.yml b/ansible/roles/docker-service/defaults/main.yml new file mode 100644 index 0000000..e07508c --- /dev/null +++ b/ansible/roles/docker-service/defaults/main.yml @@ -0,0 +1,5 @@ +docker_service__etc: /etc/docker-service +systemd_unit: "docker-service-{{ service }}" +systemd_enabled: "yes" +systemd_state: "started" +docker_service__restart: [] diff --git a/ansible/roles/docker-service/handlers/main.yml b/ansible/roles/docker-service/handlers/main.yml new file mode 100644 index 0000000..475c016 --- /dev/null +++ b/ansible/roles/docker-service/handlers/main.yml @@ -0,0 +1,7 @@ +- name: systemctl restart + become: yes + loop: "{{ docker_service__restart }}" + systemd: + name: "{{ item }}" + state: restarted +# debug: var=docker_service__restart diff --git a/ansible/roles/docker-service/tasks/main.yml b/ansible/roles/docker-service/tasks/main.yml new file mode 100644 index 0000000..3b4f703 --- /dev/null +++ b/ansible/roles/docker-service/tasks/main.yml @@ -0,0 +1,64 @@ +- name: apt install docker + tags: + - never + - packages + become: yes + apt: + name: + - docker.io + - docker-compose + install_recommends: no + +- name: "{{ docker_service__etc }}/{{ service }}" + become: yes + file: + path: "{{ docker_service__etc }}/{{ service }}" + state: directory + +- name: "{{ docker_service__etc }}/{{ service }}/docker-compose.yml" + become: yes + template: + dest: "{{ docker_service__etc }}/{{ service }}/docker-compose.yml" + src: "{{ template }}" + notify: systemctl restart + register: docker_descriptor + +- name: "/etc/systemd/system/{{ systemd_unit }}.service" + become: yes + copy: + dest: "/etc/systemd/system/{{ systemd_unit }}.service" + content: | + [Unit] + Description=Docker service: {{ service }} + After=network-online.target + After=docker.service + Requires=docker.service + + [Service] + WorkingDirectory={{ docker_service__etc }}/{{ service }}/ + ExecStartPre=-/usr/bin/docker-compose stop + ExecStartPre=-/usr/bin/docker-compose pull + ExecStart=/usr/bin/docker-compose up -d + ExecReload=/usr/bin/docker-compose up -d + + [Install] + WantedBy=multi-user.target + notify: systemctl restart + register: systemd_descriptor + +- name: systemd daemon-reload + become: yes + systemd: + daemon_reload: yes + when: systemd_descriptor.changed + +- name: systemd start "{{ systemd_unit }}" + become: yes + systemd: + name: "{{ systemd_unit }}" + enabled: "{{ systemd_enabled }}" + state: "{{ systemd_state }}" + +- set_fact: + docker_service__restart: "{{ docker_service__restart + [systemd_unit] }}" + when: (systemd_descriptor.changed or docker_descriptor.changed) and systemd_state == 'started' diff --git a/docker/birgitte/ops/docker-compose.yml b/docker/birgitte/ops/docker-compose.yml new file mode 100644 index 0000000..fea214d --- /dev/null +++ b/docker/birgitte/ops/docker-compose.yml @@ -0,0 +1,25 @@ +version: "3" +services: + grafana: + image: grafana/grafana:7.1.5 + networks: + - influx + environment: + GF_SERVER_ROOT_URL: https://grafana.trygvis.io + ports: + - "3000:3000" + volumes: + - /pool0/ops/grafana:/var/lib/grafana + + idb: + image: influxdb:1.8.1 + ports: + - "8086:8086" + networks: + - influx + volumes: + - /home/trygvis/dev/io.trygvis/infra/docker/birgitte/ops/influxdb.conf:/etc/influxdb/influxdb.conf:ro + - /pool0/ops/influxdb:/var/lib/influxdb + +networks: + influx: diff --git a/docker/birgitte/ops/influxdb.conf b/docker/birgitte/ops/influxdb.conf new file mode 100644 index 0000000..9e41635 --- /dev/null +++ b/docker/birgitte/ops/influxdb.conf @@ -0,0 +1,155 @@ +reporting-disabled = false +bind-address = "127.0.0.1:8088" + +[meta] + dir = "/var/lib/influxdb/meta" + retention-autocreate = true + logging-enabled = true + +[data] + dir = "/var/lib/influxdb/data" + index-version = "inmem" + wal-dir = "/var/lib/influxdb/wal" + wal-fsync-delay = "0s" + validate-keys = false + query-log-enabled = true + cache-max-memory-size = 1073741824 + cache-snapshot-memory-size = 26214400 + cache-snapshot-write-cold-duration = "10m0s" + compact-full-write-cold-duration = "4h0m0s" + compact-throughput = 50331648 + compact-throughput-burst = 50331648 + max-series-per-database = 1000000 + max-values-per-tag = 100000 + max-concurrent-compactions = 0 + max-index-log-file-size = 1048576 + series-id-set-cache-size = 100 + series-file-max-concurrent-snapshot-compactions = 0 + trace-logging-enabled = false + tsm-use-madv-willneed = false + +[coordinator] + write-timeout = "10s" + max-concurrent-queries = 0 + query-timeout = "0s" + log-queries-after = "0s" + max-select-point = 0 + max-select-series = 0 + max-select-buckets = 0 + +[retention] + enabled = true + check-interval = "30m0s" + +[shard-precreation] + enabled = true + check-interval = "10m0s" + advance-period = "30m0s" + +[monitor] + store-enabled = true + store-database = "_internal" + store-interval = "10s" + +[subscriber] + enabled = true + http-timeout = "30s" + insecure-skip-verify = false + ca-certs = "" + write-concurrency = 40 + write-buffer-size = 1000 + +[http] + enabled = true + bind-address = ":8086" + auth-enabled = false + log-enabled = true + suppress-write-log = false + write-tracing = false + flux-enabled = false + flux-log-enabled = false + pprof-enabled = true + pprof-auth-enabled = false + debug-pprof-enabled = false + ping-auth-enabled = false + https-enabled = false + https-certificate = "/etc/ssl/influxdb.pem" + https-private-key = "" + max-row-limit = 0 + max-connection-limit = 0 + shared-secret = "" + realm = "InfluxDB" + unix-socket-enabled = false + unix-socket-permissions = "0777" + bind-socket = "/var/run/influxdb.sock" + max-body-size = 25000000 + access-log-path = "" + max-concurrent-write-limit = 0 + max-enqueued-write-limit = 0 + enqueued-write-timeout = 30000000000 + +[logging] + format = "auto" + level = "info" + suppress-logo = false + +[[graphite]] + enabled = false + bind-address = ":2003" + database = "graphite" + retention-policy = "" + protocol = "tcp" + batch-size = 5000 + batch-pending = 10 + batch-timeout = "1s" + consistency-level = "one" + separator = "." + udp-read-buffer = 0 + +[[collectd]] + enabled = false + bind-address = ":25826" + database = "collectd" + retention-policy = "" + batch-size = 5000 + batch-pending = 10 + batch-timeout = "10s" + read-buffer = 0 + typesdb = "/usr/share/collectd/types.db" + security-level = "none" + auth-file = "/etc/collectd/auth_file" + parse-multivalue-plugin = "split" + +[[opentsdb]] + enabled = false + bind-address = ":4242" + database = "opentsdb" + retention-policy = "" + consistency-level = "one" + tls-enabled = false + certificate = "/etc/ssl/influxdb.pem" + batch-size = 1000 + batch-pending = 5 + batch-timeout = "1s" + log-point-errors = true + +[[udp]] + enabled = false + bind-address = ":8089" + database = "udp" + retention-policy = "" + batch-size = 5000 + batch-pending = 10 + read-buffer = 0 + batch-timeout = "1s" + precision = "" + +[continuous_queries] + log-enabled = true + enabled = true + query-stats-enabled = false + run-interval = "1s" + +[tls] + min-version = "" + max-version = "" diff --git a/docker/birgitte/owncloud/docker-compose.yml b/docker/birgitte/owncloud/docker-compose.yml index 3e5a599..b286847 100644 --- a/docker/birgitte/owncloud/docker-compose.yml +++ b/docker/birgitte/owncloud/docker-compose.yml @@ -36,6 +36,7 @@ services: volumes: # - files:/mnt/data # - /disk1/owncloud/data:/mnt/data +# - /etc/owncloud/config.php:/mnt/data/config/config.php - /pool0/owncloud:/mnt/data extra_hosts: database: 172.17.0.1 -- cgit v1.2.3