commit b875c663c921be76517fe302c74e173d1e69a6c0 Author: Matan Horovitz Date: Mon Feb 28 20:49:01 2022 +0200 Inital Commit - Prometheus + Grafana diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c896de9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +* +!docker-compose.yml diff --git a/config/alertmanager.yml b/config/alertmanager.yml new file mode 100644 index 0000000..62cde79 --- /dev/null +++ b/config/alertmanager.yml @@ -0,0 +1,20 @@ +global: + resolve_timeout: 5m + +route: + group_by: ['Alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 24h + receiver: 'email' +receivers: +- name: 'email' + email_configs: + - to: 'matanhorovitz@protonmail.com' + from: 'pukekoxyz@yahoo.com' + smarthost: smtp.mail.yahoo.com:587 + auth_username: 'pukekoxyz@yahoo.com' + auth_identity: 'pukekoxyz@yahoo.com' + auth_password: 'pvefngmuhcxunzqs' + require_tls: yes + send_resolved: true diff --git a/config/alerts.yml b/config/alerts.yml new file mode 100755 index 0000000..fb4c6b0 --- /dev/null +++ b/config/alerts.yml @@ -0,0 +1,14 @@ +groups: + - name: Uptime + rules: + - alert: InstanceDown + expr: up{job="services"} < 1 + for: 5m + - name: Usage + rules: + - alert: HighRootFSDiskUsage + expr: 100 - ((node_filesystem_avail_bytes{mountpoint="/",fstype!="rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/",fstype!="rootfs"}) > 80 + for: 1m + - alert: HighRedVolDiskUsage + expr: 100 - ((node_filesystem_avail_bytes{mountpoint="/Red-Vol",fstype!="rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/Red-Vol",fstype!="rootfs"}) > 70 + for: 1m diff --git a/config/prometheus.yml b/config/prometheus.yml new file mode 100755 index 0000000..6532a60 --- /dev/null +++ b/config/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +alerting: + alertmanagers: + - static_configs: + - targets: ['alertmanager:9093'] +rule_files: + - alerts.yml + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ['prometheus:9090'] + - job_name: takahe + static_configs: + - targets: ['192.168.0.66:9100'] + - job_name: cadvisor + scrape_interval: 5s + static_configs: + - targets: + - cadvisor:8080 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100755 index 0000000..64b7c1a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,89 @@ +version: '3.2' +services: + prometheus: + image: prom/prometheus:latest + container_name: prometheus + ports: + - 9090:9090 + command: + - --config.file=/etc/prometheus/prometheus.yml + volumes: + - ./config/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./config/alerts.yml:/etc/prometheus/alerts.yml + depends_on: + - cadvisor + restart: unless-stopped + networks: + - network + - internal + labels: + - "traefik.enable=true" + - "traefik.docker.network=prometheus_network" + - "traefik.http.routers.prometheus.entrypoints=websecure" + - "traefik.http.routers.prometheus.rule=Host(`monitor.pukeko.xyz`)" + - "traefik.http.routers.prometheus.tls.certresolver=pukekoresolver" + - "traefik.http.routers.prometheus.middlewares=authelia@docker" + + alertmanager: + container_name: alertmanager + image: prom/alertmanager + volumes: + - ./config/alerts.yml:/etc/prometheus/alerts.yml + - ./config/alertmanager.yml:/etc/prometheus/alertmanager.yml + ports: + - '9093:9093' + restart: unless-stopped + networks: + - internal + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: cadvisor + ports: + - 1010:8080 + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + depends_on: + - redis + restart: unless-stopped + networks: + - internal + redis: + image: redis:alpine + container_name: redis + ports: + - 6379:6379 + restart: unless-stopped + networks: + - internal + grafana: + container_name: grafana + image: grafana/grafana + depends_on: + - prometheus + ports: + - '1000:3000' + volumes: + - './grafana/data:/var/lib/grafana' + - './grafana/provisioning/:/etc/grafana/provisioning/' + - './grafana/config/:/etc/grafana/' + restart: unless-stopped + user: '1000' + networks: + - network + - internal + labels: + - "traefik.enable=true" + - "traefik.docker.network=prometheus_network" + - "traefik.http.routers.grafana.entrypoints=websecure" + - "traefik.http.services.grafana.loadbalancer.server.port=3000" + - "traefik.http.routers.grafana.rule=Host(`flight.pukeko.xyz`)" + - "traefik.http.routers.grafana.tls.certresolver=pukekoresolver" + - "traefik.http.routers.grafana.middlewares=authelia@docker" +networks: + network: + driver: bridge + internal: + driver: bridge diff --git a/grafana/config/grafana.ini b/grafana/config/grafana.ini new file mode 100644 index 0000000..c4a445b --- /dev/null +++ b/grafana/config/grafana.ini @@ -0,0 +1,1111 @@ +##################### Grafana Configuration Defaults ##################### +# +# Do not modify this file in grafana installs +# + +# possible values : production, development +app_mode = production + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +instance_name = ${HOSTNAME} + +#################################### Paths ############################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +data = data + +# Temporary files in `data` directory older than given duration will be removed +temp_data_lifetime = 24h + +# Directory where grafana can store logs +logs = data/log + +# Directory where grafana will automatically scan and look for plugins +plugins = data/plugins + +# folder that contains provisioning config files that grafana will apply on startup and while running. +provisioning = conf/provisioning + +#################################### Server ############################## +[server] +# Protocol (http, https, h2, socket) +protocol = http + +# The ip address to bind to, empty will bind to all interfaces +http_addr = + +# The http port to use +http_port = 3000 + +# The public facing domain name used to access grafana from a browser +domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +enforce_domain = false + +# The full public facing url +root_url = %(protocol)s://%(domain)s:%(http_port)s/ + +# Serve Grafana from subpath specified in `root_url` setting. By default it is set to `false` for compatibility reasons. +serve_from_sub_path = false + +# Log web requests +router_logging = false + +# the path relative working path +static_root_path = public + +# enable gzip +enable_gzip = false + +# https certs & key file +cert_file = +cert_key = + +# Unix socket path +socket = /tmp/grafana.sock + +# CDN Url +cdn_url = + +# Sets the maximum time in minutes before timing out read of an incoming request and closing idle connections. +# `0` means there is no timeout for reading the request. +read_timeout = 0 + +#################################### Database ############################ +[database] +# You can configure the database connection by specifying type, host, name, user and password +# as separate properties or as on string using the url property. + +# Either "mysql", "postgres" or "sqlite3", it's your choice +type = sqlite3 +host = 127.0.0.1:3306 +name = grafana +user = root +# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" +password = +# Use either URL or the previous fields to configure the database +# Example: mysql://user:secret@host:port/database +url = + +# Max idle conn setting default is 2 +max_idle_conn = 2 + +# Max conn setting default is 0 (mean not set) +max_open_conn = + +# Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) +conn_max_lifetime = 14400 + +# Set to true to log the sql calls and execution times. +log_queries = + +# For "postgres", use either "disable", "require" or "verify-full" +# For "mysql", use either "true", "false", or "skip-verify". +ssl_mode = disable + +# Database drivers may support different transaction isolation levels. +# Currently, only "mysql" driver supports isolation levels. +# If the value is empty - driver's default isolation level is applied. +# For "mysql" use "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ" or "SERIALIZABLE". +isolation_level = + +ca_cert_path = +client_key_path = +client_cert_path = +server_cert_name = + +# For "sqlite3" only, path relative to data_path setting +path = grafana.db + +# For "sqlite3" only. cache mode setting used for connecting to the database +cache_mode = private + +#################################### Cache server ############################# +[remote_cache] +# Either "redis", "memcached" or "database" default is "database" +type = database + +# cache connectionstring options +# database: will use Grafana primary database. +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=0,ssl=false`. Only addr is required. ssl may be 'true', 'false', or 'insecure'. +# memcache: 127.0.0.1:11211 +connstr = + +#################################### Data proxy ########################### +[dataproxy] + +# This enables data proxy logging, default is false +logging = false + +# How long the data proxy waits to read the headers of the response before timing out, default is 30 seconds. +# This setting also applies to core backend HTTP data sources where query requests use an HTTP client with timeout set. +timeout = 30 + +# How long the data proxy waits to establish a TCP connection before timing out, default is 10 seconds. +dialTimeout = 10 + +# How many seconds the data proxy waits before sending a keepalive request. +keep_alive_seconds = 30 + +# How many seconds the data proxy waits for a successful TLS Handshake before timing out. +tls_handshake_timeout_seconds = 10 + +# How many seconds the data proxy will wait for a server's first response headers after +# fully writing the request headers if the request has an "Expect: 100-continue" +# header. A value of 0 will result in the body being sent immediately, without +# waiting for the server to approve. +expect_continue_timeout_seconds = 1 + +# Optionally limits the total number of connections per host, including connections in the dialing, +# active, and idle states. On limit violation, dials will block. +# A value of zero (0) means no limit. +max_conns_per_host = 0 + +# The maximum number of idle connections that Grafana will keep alive. +max_idle_connections = 100 + +# How many seconds the data proxy keeps an idle connection open before timing out. +idle_conn_timeout_seconds = 90 + +# If enabled and user is not anonymous, data proxy will add X-Grafana-User header with username into the request. +send_user_header = false + +# Limit the amount of bytes that will be read/accepted from responses of outgoing HTTP requests. +response_limit = 0 + +# Limits the number of rows that Grafana will process from SQL data sources. +row_limit = 1000000 + +#################################### Analytics ########################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +reporting_enabled = true + +# The name of the distributor of the Grafana instance. Ex hosted-grafana, grafana-labs +reporting_distributor = grafana-labs + +# Set to false to disable all checks to https://grafana.com +# for new versions (grafana itself and plugins), check is used +# in some UI views to notify that grafana or plugin update exists +# This option does not cause any auto updates, nor send any information +# only a GET request to https://grafana.com to get latest versions +check_for_updates = true + +# Google Analytics universal tracking code, only enabled if you specify an id here +google_analytics_ua_id = + +# Google Tag Manager ID, only enabled if you specify an id here +google_tag_manager_id = + +# Rudderstack write key, enabled only if rudderstack_data_plane_url is also set +rudderstack_write_key = + +# Rudderstack data plane url, enabled only if rudderstack_write_key is also set +rudderstack_data_plane_url = + +# Rudderstack SDK url, optional, only valid if rudderstack_write_key and rudderstack_data_plane_url is also set +rudderstack_sdk_url = + +# Rudderstack Config url, optional, used by Rudderstack SDK to fetch source config +rudderstack_config_url = + +# Application Insights connection string. Specify an URL string to enable this feature. +application_insights_connection_string = + +# Optional. Specifies an Application Insights endpoint URL where the endpoint string is wrapped in backticks ``. +application_insights_endpoint_url = + +#################################### Security ############################ +[security] +# disable creation of admin user on first start of grafana +disable_initial_admin_creation = false + +# default admin user, created on startup +admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +admin_password = admin + +# used for signing +secret_key = SW2YcwTIb9zpOOhoPsMm + +# current key provider used for envelope encryption, default to static value specified by secret_key +encryption_provider = secretKey + +# list of configured key providers, space separated (Enterprise only): e.g., awskms.v1 azurekv.v1 +available_encryption_providers = + +# disable gravatar profile images +disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +data_source_proxy_whitelist = + +# disable protection against brute force login attempts +disable_brute_force_login_protection = false + +# set to true if you host Grafana behind HTTPS. default is false. +cookie_secure = false + +# set cookie SameSite attribute. defaults to `lax`. can be set to "lax", "strict", "none" and "disabled" +cookie_samesite = lax + +# set to true if you want to allow browsers to render Grafana in a ,