Deploy grafana+prometheus configuration using docker

Deploy grafana+prometheus configuration using docker

docker-compose-monitor.yml

version: '2'

networks:
  monitor:
    driver: bridge

services:
  influxdb:
    image: influxdb:latest
    container_name: tig-influxdb
    ports:
      - "18083:8083"
      - "18086:8086"
      - "18090:8090"
    env_file:
      - 'env.influxdb'
    volumes:
      # Data persistence
      # sudo mkdir -p ./influxdb/data
      - ./influxdb/data:/var/lib/influxdb
      # Configure the time in docker to be Eastern Time Zone 8 - ./timezone:/etc/timezone:ro
      - ./localtime:/etc/localtime:ro
    restart: unless-stopped #Automatically restart telegraf after stopping:
    image: telegraf:latest
    container_name: tig-telegraf
    links:
      - influxdb
    volumes:
      - ./telegraf.conf:/etc/telegraf/telegraf.conf:ro
      - ./timezone:/etc/timezone:ro
      - ./localtime:/etc/localtime:ro
    restart: unless-stopped
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    hostname: prometheus
    restart: always
    volumes:
      - /home/qa/docker/grafana/prometheus.yml:/etc/prometheus/prometheus.yml
      - /home/qa/docker/grafana/node_down.yml:/etc/prometheus/node_down.yml
    ports:
      - '9090:9090'
    networks:
      - monitor

  alertmanager:
    image: prom/alertmanager
    container_name: alertmanager
    hostname: alertmanager
    restart: always
    volumes:
      - /home/qa/docker/grafana/alertmanager.yml:/etc/alertmanager/alertmanager.yml
    ports:
      - '9093:9093'
    networks:
      - monitor

  grafana:
    image: grafana/grafana:6.7.4
    container_name: grafana
    hostname: grafana
    restart: always
    ports:
      - '13000:3000'
    networks:
      - monitor

  node-exporter:
    image: quay.io/prometheus/node-exporter
    container_name: node-exporter
    hostname: node-exporter
    restart: always
    ports:
      - '9100:9100'
    networks:
      - monitor

  cadvisor:
    image: google/cadvisor:latest
    container_name: cadvisor
    hostname: cadvisor
    restart: always
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:rw
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    ports:
      - '18080:8080'
    networks:
      - monitor

alertmanager.yml

global:
  resolve_timeout: 5m
  smtp_from: 'email'
  smtp_smarthost: 'smtp.exmail.qq.com:25'
  smtp_auth_username: 'email'
  smtp_auth_password: 'password'
  smtp_require_tls: false
  smtp_hello: 'qq.com'
route:
  group_by: ['alertname']
  group_wait: 5s
  group_interval: 5s
  repeat_interval: 5m
  receiver: 'email'
receivers:
- name: 'email'
  email_configs:
  - to: 'Email address'
    send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

prometheus.yml

global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['192.168.32.117:9093']
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  - "node_down.yml"
  # - "node-exporter-alert-rules.yml"
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  #IO storage node group - job_name: 'io'
    scrape_interval: 8s
    static_configs: #The port is the port where node-exporter is started - targets: ['192.168.32.117:9100']
      - targets: ['192.168.32.196:9100']
      - targets: ['192.168.32.136:9100']
      - targets: ['192.168.32.193:9100']
      - targets: ['192.168.32.153:9100']
      - targets: ['192.168.32.185:9100']
      - targets: ['192.168.32.190:19100']
      - targets: ['192.168.32.192:9100']

  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'cadvisor'
    static_configs: #The port is the port started by cadvisor - targets: ['192.168.32.117:18080']
      - targets: ['192.168.32.193:8080']
      - targets: ['192.168.32.153:8080']
      - targets: ['192.168.32.185:8080']
      - targets: ['192.168.32.190:18080']
      - targets: ['192.168.32.192:18080']

node_down.yml

groups:
  - name: node_down
    rules:
      - alert: InstanceDown
        expr: up == 0
        for: 1m
        labels:
          user: test
        annotations:
          summary: 'Instance {{ $labels.instance }} down'
          description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.'

        #Remaining memory is less than 10%
      - alert: Remaining memory is less than 10%
        expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: Host out of memory (instance {{ $labels.instance }})
          description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

        #The remaining disk is less than 10%
      - alert: The remaining disk space is less than 10%.
        expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
        for: 2m
        labels:
          severity: warning
        annotations:
          summary: Host out of disk space (instance {{ $labels.instance }})
          description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

        #cpu load > 80%
      - alert: CPU load > 80%
        expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
        for: 0m
        labels:
          severity: warning
        annotations:
          summary: Host high CPU load (instance {{ $labels.instance }})
          description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

Alerts: https://awesome-prometheus-alerts.grep.to/rules#prometheus-self-monitoring

Official dashboard: https://grafana.com/grafana/dashboards/

This is the end of this article about docker deployment of grafana+prometheus configuration. For more relevant docker deployment of grafana+prometheus content, please search for previous articles on 123WORDPRESS.COM or continue to browse the following related articles. I hope you will support 123WORDPRESS.COM in the future!

You may also be interested in:
  • Detailed explanation of performance monitoring of MySQL server using Prometheus and Grafana
  • Detailed tutorial on grafana installation and usage
  • Use Grafana to display monitoring charts of Docker containers and set email alert rules (illustration)
  • Detailed explanation of the tutorial on monitoring Springboot applications using Prometheus+Grafana
  • Use Grafana+Prometheus to monitor MySQL service performance
  • Detailed explanation of the process of building Prometheus+Grafana based on docker
  • Summary of influx+grafana custom python data collection and some pitfalls
  • Detailed steps for SpringBoot+Prometheus+Grafana to implement application monitoring and alarm
  • How to install grafana and add influxdb monitoring under Linux
  • Analyze the method of prometheus+grafana monitoring nginx
  • Prometheus monitors MySQL using grafana display
  • How to monitor Docker using Grafana on Ubuntu
  • Detailed tutorial on building a JMeter+Grafana+Influxdb monitoring platform with Docker
  • Tutorial on building a JMeter+Grafana+influxdb visual performance monitoring platform in docker environment
  • ELK and Grafana jointly create visual monitoring to analyze nginx logs
  • It doesn’t matter if you forget your Grafana password. 2 ways to reset your Grafana admin password

<<:  Use html-webpack-plugin' to generate HTML page plugin in memory

>>:  HTML5+CSS3 header creation example and update

Recommend

Detailed steps to store emoji expressions in MySQL

Caused by: java.sql.SQLException: Incorrect strin...

mysql workbench installation and configuration tutorial under centOS

This article shares the MySQL Workbench installat...

How to implement on-demand import and global import in element-plus

Table of contents Import on demand: Global Import...

The difference between html block-level tags and inline tags

1. Block-level element: refers to the ability to e...

React's method of realizing secondary linkage

This article shares the specific code of React to...

Complete steps for Docker to pull images

1. Docker pull pulls the image When using $ docke...

How to operate the check box in HTML page

Checkboxes are very common on web pages. Whether ...

Nginx configuration PC site mobile site separation to achieve redirection

Use nginx to configure the separation of PC site ...

Design reference WordPress website building success case

Each of these 16 sites is worth reading carefully,...

In-depth understanding of MySQL various locks

Table of contents Lock Overview Lock classificati...

Vue sample code for implementing two-column horizontal timeline

Table of contents 1. Implement the component time...

Example code for using @media in CSS3 to achieve web page adaptation

Nowadays, the screen resolution of computer monit...

Solution to changing the data storage location of the database in MySQL 5.7

As the data stored in the MySQL database graduall...

Element Plus implements Affix

Table of contents 1. Component Introduction 2. So...

Web design skills: iframe adaptive height problem

Maybe some people have not come across this issue ...