aboutsummaryrefslogtreecommitdiff
path: root/ansible
diff options
context:
space:
mode:
Diffstat (limited to 'ansible')
-rw-r--r--ansible/host_vars/birgitte/ops-agent.yml17
-rw-r--r--ansible/host_vars/malabaricus/ops-agent.yml29
-rw-r--r--ansible/plays/ops-agent.yml36
-rw-r--r--ansible/plays/ops-server.yml34
-rw-r--r--ansible/plays/templates/ops-agent/docker-compose.yml27
-rw-r--r--ansible/plays/templates/ops-agent/telegraf.conf55
-rw-r--r--ansible/plays/templates/ops-server/docker-compose.yml25
-rw-r--r--ansible/plays/templates/ops-server/influxdb.conf155
-rw-r--r--ansible/roles/docker-service/defaults/main.yml5
-rw-r--r--ansible/roles/docker-service/handlers/main.yml7
-rw-r--r--ansible/roles/docker-service/tasks/main.yml64
11 files changed, 454 insertions, 0 deletions
diff --git a/ansible/host_vars/birgitte/ops-agent.yml b/ansible/host_vars/birgitte/ops-agent.yml
new file mode 100644
index 0000000..5c2c3cf
--- /dev/null
+++ b/ansible/host_vars/birgitte/ops-agent.yml
@@ -0,0 +1,17 @@
+telegraf_extra_mount_points:
+ - /dev/disk
+ - /dev/sda
+
+telegraf_sensors_enable: yes
+telegraf_smart_enable: yes
+# ls -1 /dev/disk/by-id/|grep ^ata|grep -v -- -part
+telegraf_smart_devices:
+ - ata-APPLE_SSD_SM128_DFKUA04844EC992
+ - ata-Corsair_CSSD-F120GB2_10446506330009980010
+ - ata-HGST_HUS728T8TALE6L4_VDJA4N6K
+ - ata-HGST_HUS728T8TALE6L4_VDJARDYK
+ - ata-INTEL_SSDSC2CT240A4_CVKI304304XQ240DGN
+ - ata-INTEL_SSDSC2CT240A4_CVKI319001LL240DGN
+ - ata-INTEL_SSDSC2CW240A3_CVCV24100044240CGN
+ - ata-ST4000DM005-2DP166_WDH12006
+ - ata-ST4000DM005-2DP166_WDH1209R
diff --git a/ansible/host_vars/malabaricus/ops-agent.yml b/ansible/host_vars/malabaricus/ops-agent.yml
new file mode 100644
index 0000000..72c221b
--- /dev/null
+++ b/ansible/host_vars/malabaricus/ops-agent.yml
@@ -0,0 +1,29 @@
+telegraf_extra_config: |
+ [[inputs.multifile]]
+ base_dir = "/sys/class/thermal"
+
+ ## If true discard all data when a single file can't be read.
+ ## Else, Telegraf omits the field generated from this file.
+ # fail_early = true
+
+ ## Files to parse each interval.
+ [[inputs.multifile.file]]
+ file = "thermal_zone0/temp"
+ dest = "zone0"
+ conversion = "float(3)"
+ [[inputs.multifile.file]]
+ file = "thermal_zone1/temp"
+ dest = "zone1"
+ conversion = "float(3)"
+ [[inputs.multifile.file]]
+ file = "thermal_zone2/temp"
+ dest = "zone2"
+ conversion = "float(3)"
+ [[inputs.multifile.file]]
+ file = "thermal_zone3/temp"
+ dest = "zone3"
+ conversion = "float(3)"
+ [[inputs.multifile.file]]
+ file = "thermal_zone4/temp"
+ dest = "zone4"
+ conversion = "float(3)"
diff --git a/ansible/plays/ops-agent.yml b/ansible/plays/ops-agent.yml
new file mode 100644
index 0000000..992123a
--- /dev/null
+++ b/ansible/plays/ops-agent.yml
@@ -0,0 +1,36 @@
+- hosts:
+ - birgitte
+ - malabaricus
+ tasks:
+# Generate template telegraf.conf
+# docker run --rm telegraf:1.14 bash
+# Full config:
+#
+# telegraf config
+#
+# Smarter config:
+#
+# telegraf --input-filter cpu:disk:diskio:kernel:mem:processes:system:swap config|grep -v '^ *#'|uniq
+#
+
+ - become: yes
+ name: mkdir /etc/docker-service/ops-agent
+ file:
+ dest: /etc/docker-service/ops-agent
+ state: directory
+ owner: root
+ group: root
+ mode: u=rx
+ - become: yes
+ template:
+ src: "templates/ops-agent/telegraf.conf"
+ dest: "/etc/docker-service/ops-agent/telegraf.conf"
+
+ - import_role:
+ name: docker-service
+ tags: docker-service
+ vars:
+ service: ops-agent
+ template: templates/ops-agent/docker-compose.yml
+# systemd_enabled: no
+# systemd_state: stopped
diff --git a/ansible/plays/ops-server.yml b/ansible/plays/ops-server.yml
new file mode 100644
index 0000000..adfcfc6
--- /dev/null
+++ b/ansible/plays/ops-server.yml
@@ -0,0 +1,34 @@
+- hosts:
+ - birgitte
+ vars:
+ grafana_basedir: /pool0/ops/grafana
+ tasks:
+ - become: yes
+ name: mkdir /etc/docker-service/ops-server
+ file:
+ dest: /etc/docker-service/ops-server
+ state: directory
+ owner: root
+ group: root
+ mode: u=rx
+ - become: yes
+ template:
+ src: "templates/ops-server/influxdb.conf"
+ dest: "/etc/docker-service/ops-server/"
+
+ - become: yes
+ name: permissions
+ file:
+ dest: "{{ grafana_basedir }}"
+ owner: "472"
+ recurse: yes
+
+ - import_role:
+ name: docker-service
+ tags: docker-service
+ vars:
+ service: ops-server
+ template: templates/ops-server/docker-compose.yml
+# systemd_enabled: no
+# systemd_state: stopped
+
diff --git a/ansible/plays/templates/ops-agent/docker-compose.yml b/ansible/plays/templates/ops-agent/docker-compose.yml
new file mode 100644
index 0000000..c5a4ecc
--- /dev/null
+++ b/ansible/plays/templates/ops-agent/docker-compose.yml
@@ -0,0 +1,27 @@
+version: "3"
+services:
+ telegraf:
+ image: telegraf:1.15
+ privileged: true
+ network_mode: host
+ volumes:
+ - /etc/docker-service/ops-agent/telegraf.conf:/etc/telegraf/telegraf.conf:ro
+ - /var/run/docker.sock:/var/run/docker.sock:ro
+ - /sys:/rootfs/sys:ro
+ - /proc:/rootfs/proc:ro
+ - /etc:/rootfs/etc:ro
+{% for mp in telegraf_extra_mount_points|default([]) %}
+ - {{ mp }}:{{ mp }}:ro
+{% endfor %}
+
+ command:
+ - sh
+ - -c
+ - apt update && apt install -y --install-recommends=no smartmontools; exec telegraf
+ environment:
+ INFLUX_URL: "http://birgitte.vpn.trygvis.io:8086"
+ INFLUX_SKIP_DATABASE_CREATION: "true"
+ HOST_PROC: "/rootfs/proc"
+ HOST_SYS: "/rootfs/sys"
+ HOST_ETC: "/rootfs/etc"
+ HOST_MOUNT_PREFIX: "/rootfs"
diff --git a/ansible/plays/templates/ops-agent/telegraf.conf b/ansible/plays/templates/ops-agent/telegraf.conf
new file mode 100644
index 0000000..a408ef6
--- /dev/null
+++ b/ansible/plays/templates/ops-agent/telegraf.conf
@@ -0,0 +1,55 @@
+[global_tags]
+
+[agent]
+ interval = "10s"
+ round_interval = true
+ metric_batch_size = 1000
+ metric_buffer_limit = 10000
+ collection_jitter = "0s"
+ flush_interval = "10s"
+ flush_jitter = "0s"
+ precision = ""
+ hostname = "{{ ansible_hostname }}"
+ omit_hostname = false
+
+[[inputs.cpu]]
+ percpu = true
+ totalcpu = true
+ collect_cpu_time = false
+ report_active = false
+
+[[inputs.disk]]
+ ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
+
+[[inputs.diskio]]
+[[inputs.kernel]]
+[[inputs.mem]]
+[[inputs.processes]]
+[[inputs.swap]]
+[[inputs.system]]
+[[inputs.docker]]
+
+{% if telegraf_sensors_enable|default(false) %}
+[[inputs.sensors]]
+{% endif %}
+
+{% if telegraf_smart_enable|default(false) %}
+[[inputs.smart]]
+{% set devs=telegraf_smart_devices|default([]) %}
+{% if devs|length > 0 %}
+devices = [
+{% for dev in devs %}
+ "{{ dev }}",
+{% endfor %}
+]
+{% endif %}
+{% endif %}
+
+[[outputs.influxdb]]
+ urls = ["$INFLUX_URL"]
+ skip_database_creation = false
+
+{% set cfg=telegraf_extra_config|default("") %}
+{% if cfg|length > 0%}
+{{ cfg }}
+{% endif %}
diff --git a/ansible/plays/templates/ops-server/docker-compose.yml b/ansible/plays/templates/ops-server/docker-compose.yml
new file mode 100644
index 0000000..a3d7033
--- /dev/null
+++ b/ansible/plays/templates/ops-server/docker-compose.yml
@@ -0,0 +1,25 @@
+version: "3"
+services:
+ grafana:
+ image: grafana/grafana:7.1.5
+ networks:
+ - influx
+ environment:
+ GF_SERVER_ROOT_URL: https://grafana.trygvis.io
+ ports:
+ - "3000:3000"
+ volumes:
+ - {{ grafana_basedir }}:/var/lib/grafana
+
+ idb:
+ image: influxdb:1.8.1
+ ports:
+ - "8086:8086"
+ networks:
+ - influx
+ volumes:
+ - /home/trygvis/dev/io.trygvis/infra/docker/birgitte/ops/influxdb.conf:/etc/influxdb/influxdb.conf:ro
+ - /pool0/ops/influxdb:/var/lib/influxdb
+
+networks:
+ influx:
diff --git a/ansible/plays/templates/ops-server/influxdb.conf b/ansible/plays/templates/ops-server/influxdb.conf
new file mode 100644
index 0000000..9e41635
--- /dev/null
+++ b/ansible/plays/templates/ops-server/influxdb.conf
@@ -0,0 +1,155 @@
+reporting-disabled = false
+bind-address = "127.0.0.1:8088"
+
+[meta]
+ dir = "/var/lib/influxdb/meta"
+ retention-autocreate = true
+ logging-enabled = true
+
+[data]
+ dir = "/var/lib/influxdb/data"
+ index-version = "inmem"
+ wal-dir = "/var/lib/influxdb/wal"
+ wal-fsync-delay = "0s"
+ validate-keys = false
+ query-log-enabled = true
+ cache-max-memory-size = 1073741824
+ cache-snapshot-memory-size = 26214400
+ cache-snapshot-write-cold-duration = "10m0s"
+ compact-full-write-cold-duration = "4h0m0s"
+ compact-throughput = 50331648
+ compact-throughput-burst = 50331648
+ max-series-per-database = 1000000
+ max-values-per-tag = 100000
+ max-concurrent-compactions = 0
+ max-index-log-file-size = 1048576
+ series-id-set-cache-size = 100
+ series-file-max-concurrent-snapshot-compactions = 0
+ trace-logging-enabled = false
+ tsm-use-madv-willneed = false
+
+[coordinator]
+ write-timeout = "10s"
+ max-concurrent-queries = 0
+ query-timeout = "0s"
+ log-queries-after = "0s"
+ max-select-point = 0
+ max-select-series = 0
+ max-select-buckets = 0
+
+[retention]
+ enabled = true
+ check-interval = "30m0s"
+
+[shard-precreation]
+ enabled = true
+ check-interval = "10m0s"
+ advance-period = "30m0s"
+
+[monitor]
+ store-enabled = true
+ store-database = "_internal"
+ store-interval = "10s"
+
+[subscriber]
+ enabled = true
+ http-timeout = "30s"
+ insecure-skip-verify = false
+ ca-certs = ""
+ write-concurrency = 40
+ write-buffer-size = 1000
+
+[http]
+ enabled = true
+ bind-address = ":8086"
+ auth-enabled = false
+ log-enabled = true
+ suppress-write-log = false
+ write-tracing = false
+ flux-enabled = false
+ flux-log-enabled = false
+ pprof-enabled = true
+ pprof-auth-enabled = false
+ debug-pprof-enabled = false
+ ping-auth-enabled = false
+ https-enabled = false
+ https-certificate = "/etc/ssl/influxdb.pem"
+ https-private-key = ""
+ max-row-limit = 0
+ max-connection-limit = 0
+ shared-secret = ""
+ realm = "InfluxDB"
+ unix-socket-enabled = false
+ unix-socket-permissions = "0777"
+ bind-socket = "/var/run/influxdb.sock"
+ max-body-size = 25000000
+ access-log-path = ""
+ max-concurrent-write-limit = 0
+ max-enqueued-write-limit = 0
+ enqueued-write-timeout = 30000000000
+
+[logging]
+ format = "auto"
+ level = "info"
+ suppress-logo = false
+
+[[graphite]]
+ enabled = false
+ bind-address = ":2003"
+ database = "graphite"
+ retention-policy = ""
+ protocol = "tcp"
+ batch-size = 5000
+ batch-pending = 10
+ batch-timeout = "1s"
+ consistency-level = "one"
+ separator = "."
+ udp-read-buffer = 0
+
+[[collectd]]
+ enabled = false
+ bind-address = ":25826"
+ database = "collectd"
+ retention-policy = ""
+ batch-size = 5000
+ batch-pending = 10
+ batch-timeout = "10s"
+ read-buffer = 0
+ typesdb = "/usr/share/collectd/types.db"
+ security-level = "none"
+ auth-file = "/etc/collectd/auth_file"
+ parse-multivalue-plugin = "split"
+
+[[opentsdb]]
+ enabled = false
+ bind-address = ":4242"
+ database = "opentsdb"
+ retention-policy = ""
+ consistency-level = "one"
+ tls-enabled = false
+ certificate = "/etc/ssl/influxdb.pem"
+ batch-size = 1000
+ batch-pending = 5
+ batch-timeout = "1s"
+ log-point-errors = true
+
+[[udp]]
+ enabled = false
+ bind-address = ":8089"
+ database = "udp"
+ retention-policy = ""
+ batch-size = 5000
+ batch-pending = 10
+ read-buffer = 0
+ batch-timeout = "1s"
+ precision = ""
+
+[continuous_queries]
+ log-enabled = true
+ enabled = true
+ query-stats-enabled = false
+ run-interval = "1s"
+
+[tls]
+ min-version = ""
+ max-version = ""
diff --git a/ansible/roles/docker-service/defaults/main.yml b/ansible/roles/docker-service/defaults/main.yml
new file mode 100644
index 0000000..e07508c
--- /dev/null
+++ b/ansible/roles/docker-service/defaults/main.yml
@@ -0,0 +1,5 @@
+docker_service__etc: /etc/docker-service
+systemd_unit: "docker-service-{{ service }}"
+systemd_enabled: "yes"
+systemd_state: "started"
+docker_service__restart: []
diff --git a/ansible/roles/docker-service/handlers/main.yml b/ansible/roles/docker-service/handlers/main.yml
new file mode 100644
index 0000000..475c016
--- /dev/null
+++ b/ansible/roles/docker-service/handlers/main.yml
@@ -0,0 +1,7 @@
+- name: systemctl restart
+ become: yes
+ loop: "{{ docker_service__restart }}"
+ systemd:
+ name: "{{ item }}"
+ state: restarted
+# debug: var=docker_service__restart
diff --git a/ansible/roles/docker-service/tasks/main.yml b/ansible/roles/docker-service/tasks/main.yml
new file mode 100644
index 0000000..3b4f703
--- /dev/null
+++ b/ansible/roles/docker-service/tasks/main.yml
@@ -0,0 +1,64 @@
+- name: apt install docker
+ tags:
+ - never
+ - packages
+ become: yes
+ apt:
+ name:
+ - docker.io
+ - docker-compose
+ install_recommends: no
+
+- name: "{{ docker_service__etc }}/{{ service }}"
+ become: yes
+ file:
+ path: "{{ docker_service__etc }}/{{ service }}"
+ state: directory
+
+- name: "{{ docker_service__etc }}/{{ service }}/docker-compose.yml"
+ become: yes
+ template:
+ dest: "{{ docker_service__etc }}/{{ service }}/docker-compose.yml"
+ src: "{{ template }}"
+ notify: systemctl restart
+ register: docker_descriptor
+
+- name: "/etc/systemd/system/{{ systemd_unit }}.service"
+ become: yes
+ copy:
+ dest: "/etc/systemd/system/{{ systemd_unit }}.service"
+ content: |
+ [Unit]
+ Description=Docker service: {{ service }}
+ After=network-online.target
+ After=docker.service
+ Requires=docker.service
+
+ [Service]
+ WorkingDirectory={{ docker_service__etc }}/{{ service }}/
+ ExecStartPre=-/usr/bin/docker-compose stop
+ ExecStartPre=-/usr/bin/docker-compose pull
+ ExecStart=/usr/bin/docker-compose up -d
+ ExecReload=/usr/bin/docker-compose up -d
+
+ [Install]
+ WantedBy=multi-user.target
+ notify: systemctl restart
+ register: systemd_descriptor
+
+- name: systemd daemon-reload
+ become: yes
+ systemd:
+ daemon_reload: yes
+ when: systemd_descriptor.changed
+
+- name: systemd start "{{ systemd_unit }}"
+ become: yes
+ systemd:
+ name: "{{ systemd_unit }}"
+ enabled: "{{ systemd_enabled }}"
+ state: "{{ systemd_state }}"
+
+- set_fact:
+ docker_service__restart: "{{ docker_service__restart + [systemd_unit] }}"
+ when: (systemd_descriptor.changed or docker_descriptor.changed) and systemd_state == 'started'