From 008ae3a2e772a8a91f6acba26cb45369082edff6 Mon Sep 17 00:00:00 2001 From: Vladimir Vitkov Date: Fri, 26 Oct 2018 15:02:07 +0300 Subject: [PATCH] monitoring bits --- monitoring/collectd/collectd.conf | 239 ++++++++++++++ monitoring/collectd/openwrt.db | 4 + monitoring/grafana/grafana.ini | 302 ++++++++++++++++++ monitoring/graphite/conf/carbon.conf | 113 +++++++ monitoring/graphite/conf/graphite.wsgi | 25 ++ monitoring/graphite/conf/relay-rules.conf | 21 ++ monitoring/graphite/conf/rewrite-rules.conf | 36 +++ .../graphite/conf/storage-aggregation.conf | 32 ++ monitoring/graphite/conf/storage-schemas.conf | 20 ++ .../systemd-units/carbon-aggregator@.service | 20 ++ .../systemd-units/carbon-cache@.service | 20 ++ .../systemd-units/carbon-relay@.service | 19 ++ monitoring/graphite/uwsgi/apps-enabled/README | 10 + .../uwsgi/apps-enabled/graphite-web.ini | 10 + 14 files changed, 871 insertions(+) create mode 100644 monitoring/collectd/collectd.conf create mode 100644 monitoring/collectd/openwrt.db create mode 100644 monitoring/grafana/grafana.ini create mode 100644 monitoring/graphite/conf/carbon.conf create mode 100644 monitoring/graphite/conf/graphite.wsgi create mode 100644 monitoring/graphite/conf/relay-rules.conf create mode 100644 monitoring/graphite/conf/rewrite-rules.conf create mode 100644 monitoring/graphite/conf/storage-aggregation.conf create mode 100644 monitoring/graphite/conf/storage-schemas.conf create mode 100644 monitoring/graphite/systemd-units/carbon-aggregator@.service create mode 100644 monitoring/graphite/systemd-units/carbon-cache@.service create mode 100644 monitoring/graphite/systemd-units/carbon-relay@.service create mode 100644 monitoring/graphite/uwsgi/apps-enabled/README create mode 100644 monitoring/graphite/uwsgi/apps-enabled/graphite-web.ini diff --git a/monitoring/collectd/collectd.conf b/monitoring/collectd/collectd.conf new file mode 100644 index 0000000..f3efc2b --- /dev/null +++ b/monitoring/collectd/collectd.conf @@ -0,0 +1,239 @@ +############################################################################## +# Global # +#----------------------------------------------------------------------------# +# Global settings for the daemon. # +############################################################################## + +#Hostname "localhost" +FQDNLookup false +#BaseDir "/var/lib/collectd" +#PluginDir "/usr/lib/collectd" +TypesDB "/usr/share/collectd/types.db" "/etc/collectd/openwrt.db" + +#----------------------------------------------------------------------------# +# Interval at which to query values. This may be overwritten on a per-plugin # +# base by using the 'Interval' option of the LoadPlugin block: # +# # +# Interval 60 # +# # +#----------------------------------------------------------------------------# +Interval 30 + +#Timeout 2 +#ReadThreads 5 +# avoid race conditions if multiple datapoints are packed in a single packet +WriteThreads 1 + +# Logging +LoadPlugin syslog + + LogLevel info + + +# now start collecting +LoadPlugin cpu + +# free space +LoadPlugin df + + # Ignore listed filesystems + IgnoreSelected true + # and select all virtual crap + FSType bdev + FSType binfmt_misc + FSType cgroup + FSType cpuset + FSType debugfs + FSType devpts + FSType devtmpfs + FSType hugetlbfs + FSType mqueue + FSType pipefs + FSType proc + FSType pstore + FSType ramfs + FSType rootfs + FSType rpc_pipefs + FSType securityfs + FSType sockfs + FSType sysfs + FSType tmpfs + + +# IOPS +LoadPlugin disk + +# LOADAVG +LoadPlugin load + +# Ram utilisation +LoadPlugin memory + +# SWAP +LoadPlugin swap + +# Logged in +LoadPlugin users + +# traffic +LoadPlugin interface + +# temp, voltage, current, fans via ipmi +# all sensors monitored by default +LoadPlugin ipmi + + # disable some sensors collection + IgnoreSelected true + Sensor "VRM 2 processor_module (9.2)" + Sensor "VRM 1 processor_module (9.1)" + Sensor "Power Supply 1 power_supply (10.1)" + Sensor "Power Supply 2 power_supply (10.2)" + Sensor "Power Supplies power_supply (10.3)" + Sensor "UID Light system_chassis (23.1)" + Sensor "Int. Health LED system_chassis (23.2)" + Sensor "Ext. Health LED system_chassis (23.3)" + + +# pick up the lm_sensors stuff +LoadPlugin sensors + +# IRQ +LoadPlugin irq + +# nginx stats +LoadPlugin nginx + + URL "http://localhost/of2016status" + + +# now lets listen/send some data here and there +# def port 25826 +LoadPlugin network + + # multicast + Listen "239.192.74.66" + Listen "ff18::efc0:4a42" + # All local interfaces + #Listen "0.0.0.0" + # listen on test vlans + #Listen "10.200.0.1" + #Listen "10.203.0.1" + #Listen "10.204.0.1" + + # listen on prod vlans + #Listen "10.100.0.1" + #Listen "10.103.0.1" + #Listen "10.104.0.1" + Listen "*" + ReportStats true + + +# entropy statistics +LoadPlugin entropy + +# and finaly save the data in graphite +LoadPlugin write_graphite + + + Host "localhost" + Port "10001" + Protocol "tcp" + Prefix "received." + LogSendErrors true + # store raw numbers and let frontend deal with them + StoreRates false + # be verbose with metrics store + AlwaysAppendDS true + + + +LoadPlugin ConnTrack +LoadPlugin ContextSwitch + + +### try to monitor the switches too (SNMP) +LoadPlugin snmp + + # data points + + Type "if_octets" + Table true + Instance "IF-MIB::ifName" + Values "IF-MIB::ifHCInOctets" "IF-MIB::ifHCOutOctets" + + + + Type "if_errors" + Table true + Instance "IF-MIB::ifName" + Values "IF-MIB::ifInErrors" "IF-MIB::ifOutErrors" + + + # now gather my minnion + + Address "10.20.0.11" + Version 2 + Community "password" + Collect "std_traffic" "std_errors" + Interval 60 + + + + Address "10.20.0.21" + Version 2 + Community "password" + Collect "std_traffic" "std_errors" + Interval 60 + + + + Address "10.20.0.22" + Version 2 + Community "password" + Collect "std_traffic" "std_errors" + Interval 60 + + + + Address "10.20.0.26" + Version 2 + Community "password" + Collect "std_traffic" "std_errors" + Interval 60 + + + + Address "10.20.0.23" + Version 2 + Community "password" + Collect "std_traffic" "std_errors" + Interval 60 + + + + Address "172.31.42.64" + Version 2 + Community "password" + Collect "std_traffic" "std_errors" + Interval 60 + + + +# monitor unbound by exec +####LoadPlugin exec +#### +#### Exec "unbound" "/usr/local/bin/unbound-collectd.sh" +#### + +# See collectd.conf(5) +LoadPlugin unixsock + + + SocketFile "/tmp/collectd.sock" + SocketGroup "nagios" + SocketPerms "0770" + + +LoadPlugin uptime +LoadPlugin processes + diff --git a/monitoring/collectd/openwrt.db b/monitoring/collectd/openwrt.db new file mode 100644 index 0000000..be918b6 --- /dev/null +++ b/monitoring/collectd/openwrt.db @@ -0,0 +1,4 @@ +stations value:GAUGE:0:256 +signal_noise_mw value:GAUGE:0:U +signal_power_mw value:GAUGE:0:U + diff --git a/monitoring/grafana/grafana.ini b/monitoring/grafana/grafana.ini new file mode 100644 index 0000000..56c8d56 --- /dev/null +++ b/monitoring/grafana/grafana.ini @@ -0,0 +1,302 @@ +# possible values : production, development +app_mode = development + +#################################### Paths #################################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +;data = /var/lib/grafana +# +# Directory where grafana can store logs +;logs = /var/log/grafana +# +# Directory where grafana will automatically scan and look for plugins +;plugins = /var/lib/grafana/plugins + +#################################### Server #################################### +[server] +# Protocol (http or https) +;protocol = http + +# The ip address to bind to, empty will bind to all interfaces +http_addr = 127.0.0.1 + +# The http port to use +;http_port = 80 + +# The public facing domain name used to access grafana from a browser +domain = eric.openfest.org + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +enforce_domain = true + +# The full public facing url +#root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/ +root_url = %(protocol)s://%(domain)s/grafana/ + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +#################################### Database #################################### +[database] +# Either "mysql", "postgres" or "sqlite3", it's your choice +;type = sqlite3 +;host = 127.0.0.1:3306 +;name = grafana +;user = root +;password = + +# For "postgres" only, either "disable", "require" or "verify-full" +;ssl_mode = disable + +# For "sqlite3" only, path relative to data_path setting +;path = grafana.db + +#################################### Session #################################### +[session] +# Either "memory", "file", "redis", "mysql", "postgres", default is "file" +;provider = file + +# Provider config options +# memory: not have any config yet +# file: session dir path, is relative to grafana data_path +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` +# mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name` +# postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable +;provider_config = sessions + +# Session cookie name +;cookie_name = grafana_sess + +# If you use session in https only, default is false +;cookie_secure = false + +# Session life time, default is 86400 +;session_life_time = 86400 + +#################################### Analytics #################################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +reporting_enabled = false + +# Set to false to disable all checks to https://grafana.net +# for new vesions (grafana itself and plugins), check is used +# in some UI views to notify that grafana or plugin update exists +# This option does not cause any auto updates, nor send any information +# only a GET request to http://grafana.net to get latest versions +check_for_updates = false + +# Google Analytics universal tracking code, only enabled if you specify an id here +;google_analytics_ua_id = + +#################################### Security #################################### +[security] +# default admin user, created on startup +;admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = admin + +# used for signing +secret_key = aepuTheic8ierahY8Oochae7xop7eeni + +# Auto-login remember days +;login_remember_days = 7 +;cookie_username = grafana_user +;cookie_remember_name = grafana_remember + +# disable gravatar profile images +disable_gravatar = true + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +[snapshots] +# snapshot sharing options +external_enabled = false +;external_snapshot_url = https://snapshots-origin.raintank.io +;external_snapshot_name = Publish to snapshot.raintank.io + +#################################### Users #################################### +[users] +# disable user signup / registration +allow_sign_up = false + +# Allow non admin users to create organizations +allow_org_create = false + +# Set to true to automatically assign new users to the default organization (id 1) +;auto_assign_org = true + +# Default role new users will be automatically assigned (if disabled above is set to true) +;auto_assign_org_role = Viewer + +# Background text for the user field on the login page +;login_hint = email or username + +# Default UI theme ("dark" or "light") +;default_theme = light + +#################################### Anonymous Auth ########################## +[auth.anonymous] +# enable anonymous access +enabled = true + +# specify organization name that should be used for unauthenticated users +org_name = OpenFest 2018 + +# specify role for unauthenticated users +org_role = Viewer + +#################################### Github Auth ########################## +[auth.github] +enabled = false +;allow_sign_up = false +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://github.com/login/oauth/authorize +;token_url = https://github.com/login/oauth/access_token +;api_url = https://api.github.com/user +;team_ids = +;allowed_organizations = + +#################################### Google Auth ########################## +[auth.google] +enabled = false +;allow_sign_up = false +;client_id = some_client_id +;client_secret = some_client_secret +;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email +;auth_url = https://accounts.google.com/o/oauth2/auth +;token_url = https://accounts.google.com/o/oauth2/token +;api_url = https://www.googleapis.com/oauth2/v1/userinfo +;allowed_domains = + +#################################### Auth Proxy ########################## +[auth.proxy] +enabled = false +;header_name = X-WEBAUTH-USER +;header_property = username +;auto_sign_up = true + +#################################### Basic Auth ########################## +[auth.basic] +enabled = false + +#################################### Auth LDAP ########################## +[auth.ldap] +enabled = false +;config_file = /etc/grafana/ldap.toml + +#################################### SMTP / Emailing ########################## +[smtp] +enabled = false +;host = localhost:25 +;user = +;password = +;cert_file = +;key_file = +;skip_verify = false +;from_address = admin@grafana.localhost + +[emails] +welcome_email_on_sign_up = false + +#################################### Logging ########################## +[log] +# Either "console", "file", "syslog". Default is console and file +# Use space to separate multiple modes, e.g. "console file" +;mode = console, file + +# Either "trace", "debug", "info", "warn", "error", "critical", default is "info" +;level = info + +# For "console" mode only +[log.console] +;level = + +# log line format, valid options are text, console and json +;format = console + +# For "file" mode only +[log.file] +;level = + +# log line format, valid options are text, console and json +;format = text + +# This enables automated log rotate(switch of following options), default is true +;log_rotate = true + +# Max line number of single file, default is 1000000 +;max_lines = 1000000 + +# Max size shift of single file, default is 28 means 1 << 28, 256MB +;max_size_shift = 28 + +# Segment log daily, default is true +;daily_rotate = true + +# Expired days of log file(delete after max days), default is 7 +;max_days = 7 + +[log.syslog] +;level = + +# log line format, valid options are text, console and json +;format = text + +# Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. +;network = +;address = + +# Syslog facility. user, daemon and local0 through local7 are valid. +;facility = + +# Syslog tag. By default, the process' argv[0] is used. +;tag = + + +#################################### AMQP Event Publisher ########################## +[event_publisher] +;enabled = false +;rabbitmq_url = amqp://localhost/ +;exchange = grafana_events + +;#################################### Dashboard JSON files ########################## +[dashboards.json] +;enabled = false +;path = /var/lib/grafana/dashboards + +#################################### Internal Grafana Metrics ########################## +# Metrics available at HTTP API Url /api/metrics +[metrics] +# Disable / Enable internal metrics +enabled = true + +# Publish interval +interval_seconds = 10 + +# Send internal metrics to Graphite +[metrics.graphite] +address = localhost:2003 +prefix = grafana.%(instance_name)s. + +#################################### Internal Grafana Metrics ########################## +# Url used to to import dashboards directly from Grafana.net +[grafana_net] +url = https://grafana.net diff --git a/monitoring/graphite/conf/carbon.conf b/monitoring/graphite/conf/carbon.conf new file mode 100644 index 0000000..06faa2e --- /dev/null +++ b/monitoring/graphite/conf/carbon.conf @@ -0,0 +1,113 @@ +[aggregator:top] +# filesystem organization +GRAPHITE_ROOT = /opt/graphite +GRAPHITE_CONF_DIR = /opt/graphite/conf +CONF_DIR = /opt/graphite/conf +GRAPHITE_STORAGE_DIR = /opt/graphite/storage +PID_DIR = /run/graphite +LOG_DIR = /var/log/graphite +LOCAL_DATA_DIR = /opt/graphite/storage +USER = graphite + +# listeners +# echo ... | telnet +LINE_RECEIVER_INTERFACE = 127.0.0.1 +LINE_RECEIVER_PORT = 10001 +# programatic +PICKLE_RECEIVER_INTERFACE = 127.0.0.1 +PICKLE_RECEIVER_PORT = 10101 + +#UDP Listener +ENABLE_UDP_LISTENER = True +UDP_RECEIVER_INTERFACE = 127.0.0.1 +UDP_RECEIVER_PORT = 10001 + +# log connections? +LOG_LISTENER_CONNECTIONS = True + +# how to relay: rules|consistent-hashing|aggregated-consistent-hashing +RELAY_METHOD = consistent-hashing +REPLICATION_FACTOR = 1 + +AGGREGATION_RULES = aggregation-rules.conf +REWRITE_RULES = rewrite-rules.conf +FORWARD_ALL = True + +DESTINATIONS = 127.0.0.1:2004:a + +# tuning/queue/capacity +# This defines the maximum "message size" between carbon daemons. +# You shouldn't need to tune this unless you really know what you're doing. +MAX_DATAPOINTS_PER_MESSAGE = 500 +MAX_QUEUE_SIZE = 20000 +# This is the percentage that the queue must be empty before it will accept +# more messages. For a larger site, if the queue is very large it makes sense +# to tune this to allow for incoming stats. So if you have an average +# flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense +# to allow stats to start flowing when you've cleared the queue to 95% since +# you should have space to accommodate the next minute's worth of stats +# even before the relay incrementally clears more of the queue +QUEUE_LOW_WATERMARK_PCT = 0.8 + +# Set this to False to drop datapoints when any send queue (sending datapoints +# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the +# default) then sockets over which metrics are received will temporarily stop accepting +# data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE. +USE_FLOW_CONTROL = True + +# Set this to True to enable whitelisting and blacklisting of metrics in +# CONF_DIR/whitelist and CONF_DIR/blacklist. If the whitelist is missing or +# empty, all metrics will pass through +# USE_WHITELIST = False + +# By default, carbon itself will log statistics (such as a count, +# metricsReceived) with the top level prefix of 'carbon' at an interval of 60 +# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation +CARBON_METRIC_PREFIX = carbon +CARBON_METRIC_INTERVAL = 10 + +[cache:a] +# filesystem organization +GRAPHITE_ROOT = /opt/graphite +GRAPHITE_CONF_DIR = /opt/graphite/conf +CONF_DIR = /opt/graphite/conf +GRAPHITE_STORAGE_DIR = /opt/graphite/storage +PID_DIR = /run/graphite +LOG_DIR = /var/log/graphite +LOCAL_DATA_DIR = /opt/graphite/storage +USER = graphite + +# cache / mem sizing +MAX_CACHE_SIZE = 500000 +MAX_UPDATES_PER_SECOND = 500 +MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1500 +MAX_CREATES_PER_MINUTE = 120 +# WHISPER_SPARSE_CREATE = False +# delegate creation work to kernel (if available) falls back on posix calls +WHISPER_FALLOCATE_CREATE = True +# if running on shared disk and multiple caches write to same file +# WHISPER_LOCK_WRITES = False + +# listeners +LINE_RECEIVER_INTERFACE = localhost +LINE_RECEIVER_PORT = 2003 +ENABLE_UDP_LISTENER = False +UDP_RECEIVER_INTERFACE = localhost +UDP_RECEIVER_PORT = 2003 +PICKLE_RECEIVER_INTERFACE = localhost +PICKLE_RECEIVER_PORT = 2004 +# interface for bulk queries +CACHE_QUERY_INTERFACE = localhost +CACHE_QUERY_PORT = 2005 + +#some logging +LOG_LISTENER_CONNECTIONS = False +LOG_UPDATES = False +LOG_CACHE_HITS = False +LOG_CACHE_QUEUE_SORTS = False + +CACHE_WRITE_STRATEGY = sorted +WHISPER_AUTOFLUSH = False + +CARBON_METRIC_PREFIX = carbon +CARBON_METRIC_INTERVAL = 10 diff --git a/monitoring/graphite/conf/graphite.wsgi b/monitoring/graphite/conf/graphite.wsgi new file mode 100644 index 0000000..a76efe0 --- /dev/null +++ b/monitoring/graphite/conf/graphite.wsgi @@ -0,0 +1,25 @@ +import os, sys +sys.path.append('/mnt/graphite/webapp') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'graphite.settings') + +import django + +if django.VERSION < (1, 4): + from django.core.handlers.wsgi import WSGIHandler + application = WSGIHandler() +else: + # From 1.4 wsgi support was improved and since 1.7 old style WSGI script + # causes AppRegistryNotReady exception + # https://docs.djangoproject.com/en/dev/releases/1.7/#wsgi-scripts + from django.core.wsgi import get_wsgi_application + application = get_wsgi_application() + + +# READ THIS +# Initializing the search index can be very expensive, please include +# the WSGIImportScript directive pointing to this script in your vhost +# config to ensure the index is preloaded before any requests are handed +# to the process. +from graphite.logger import log +log.info("graphite.wsgi - pid %d - reloading search index" % os.getpid()) +import graphite.metrics.search diff --git a/monitoring/graphite/conf/relay-rules.conf b/monitoring/graphite/conf/relay-rules.conf new file mode 100644 index 0000000..36cabdb --- /dev/null +++ b/monitoring/graphite/conf/relay-rules.conf @@ -0,0 +1,21 @@ +# Relay destination rules for carbon-relay. Entries are scanned in order, +# and the first pattern a metric matches will cause processing to cease after sending +# unless `continue` is set to true +# +# [name] +# pattern = +# destinations = +# continue = # default: False +# +# name: Arbitrary unique name to identify the rule +# pattern: Regex pattern to match against the metric name +# destinations: Comma-separated list of destinations. +# ex: 127.0.0.1, 10.1.2.3:2004, 10.1.2.4:2004:a, myserver.mydomain.com +# continue: Continue processing rules if this rule matches (default: False) + +# You must have exactly one section with 'default = true' +# Note that all destinations listed must also exist in carbon.conf +# in the DESTINATIONS setting in the [relay] section +[default] +default = true +destinations = 127.0.0.1:2004:a diff --git a/monitoring/graphite/conf/rewrite-rules.conf b/monitoring/graphite/conf/rewrite-rules.conf new file mode 100644 index 0000000..9f45629 --- /dev/null +++ b/monitoring/graphite/conf/rewrite-rules.conf @@ -0,0 +1,36 @@ +# This file defines regular expression patterns that can be used to +# rewrite metric names in a search & replace fashion. It consists of two +# sections, [pre] and [post]. The rules in the pre section are applied to +# metric names as soon as they are received. The post rules are applied +# after aggregation has taken place. +# +# The general form of each rule is as follows: +# +# regex-pattern = replacement-text +# +# For example: +# +# [post] +# _sum$ = +# _avg$ = +# +# These rules would strip off a suffix of _sum or _avg from any metric names +# after aggregation. +[pre] +\) = _ +\( = _ +# temperature-Temp_1_system_internal_expansion_board_16_5_/value.wsp +temperature-Temp_([0-9]+)_([a-zA-Z0-9_]+)__(.*)\. = Temp-\1_\2. + +# nest under common port +if_errors-(.*).(rx|tx)$ = \1.errors.\2 +if_octets-(.*).(rx|tx)$ = \1.octets.\2 + +# expand cisco naming +Vl([0-9]+) = Vlan\1 + +# rename tplink ports +port_([0-9]+)__Gigabit_(Fiber|Copper) = Gi\1 + +# rename tplink vlans +Vlan-interface([0-9]+) = Vlan\1 diff --git a/monitoring/graphite/conf/storage-aggregation.conf b/monitoring/graphite/conf/storage-aggregation.conf new file mode 100644 index 0000000..989146c --- /dev/null +++ b/monitoring/graphite/conf/storage-aggregation.conf @@ -0,0 +1,32 @@ +# Aggregation methods for whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds +# +# [name] +# pattern = +# xFilesFactor = +# aggregationMethod = +# +# name: Arbitrary unique name for the rule +# pattern: Regex pattern to match against the metric name +# xFilesFactor: Ratio of valid data points required for aggregation to the next retention to occur +# aggregationMethod: function to apply to data points for aggregation +# +[min] +pattern = \.min$ +xFilesFactor = 0.1 +aggregationMethod = min + +[max] +pattern = \.max$ +xFilesFactor = 0.1 +aggregationMethod = max + +[sum] +pattern = \.count$ +xFilesFactor = 0 +aggregationMethod = sum + +[default_average] +pattern = .* +xFilesFactor = 0.5 +aggregationMethod = average diff --git a/monitoring/graphite/conf/storage-schemas.conf b/monitoring/graphite/conf/storage-schemas.conf new file mode 100644 index 0000000..4a44faa --- /dev/null +++ b/monitoring/graphite/conf/storage-schemas.conf @@ -0,0 +1,20 @@ +# Schema definitions for Whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds. +# +# [name] +# pattern = regex +# retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... + +# Carbon's internal metrics. This entry should match what is specified in +# CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings +[carbon] +pattern = ^carbon\. +retentions = 10:90d + +[switches_snmp] +pattern = sw\.snmp\. +retentions = 60s:30d + +[default_30s_6months] +pattern = .* +retentions = 30s:30d diff --git a/monitoring/graphite/systemd-units/carbon-aggregator@.service b/monitoring/graphite/systemd-units/carbon-aggregator@.service new file mode 100644 index 0000000..9d383d3 --- /dev/null +++ b/monitoring/graphite/systemd-units/carbon-aggregator@.service @@ -0,0 +1,20 @@ +[Unit] +Description=carbon-aggregator instance %i (graphite) +After=network.target + +[Service] +User=graphite +Group=graphite +PrivateTmp=yes +# Run ExecStartPre with root-permissions +PermissionsStartOnly=true +ExecStartPre=-/bin/mkdir -p /run/graphite +ExecStartPre=/bin/chown -R graphite:graphite /run/graphite +ExecStartPre=/bin/rm -f /run/graphite/carbon-aggregator-%i.pid +ExecStart=/opt/graphite/bin/carbon-aggregator.py --instance=%i start +Type=forking +PIDFile=/run/graphite/carbon-aggregator-%i.pid +LimitNOFILE=128000 + +[Install] +WantedBy=multi-user.target diff --git a/monitoring/graphite/systemd-units/carbon-cache@.service b/monitoring/graphite/systemd-units/carbon-cache@.service new file mode 100644 index 0000000..8d320b0 --- /dev/null +++ b/monitoring/graphite/systemd-units/carbon-cache@.service @@ -0,0 +1,20 @@ +[Unit] +Description=carbon-cache instance %i (graphite) +After=network.target + +[Service] +User=graphite +Group=graphite +PrivateTmp=yes +# Run ExecStartPre with root-permissions +PermissionsStartOnly=true +ExecStartPre=-/bin/mkdir -p /run/graphite +ExecStartPre=/bin/chown -R graphite:graphite /run/graphite +ExecStartPre=/bin/rm -f /run/graphite/carbon-cache-%i.pid +ExecStart=/opt/graphite/bin/carbon-cache.py --instance=%i start +Type=forking +PIDFile=/run/graphite/carbon-cache-%i.pid +LimitNOFILE=128000 + +[Install] +WantedBy=multi-user.target diff --git a/monitoring/graphite/systemd-units/carbon-relay@.service b/monitoring/graphite/systemd-units/carbon-relay@.service new file mode 100644 index 0000000..a8c1e82 --- /dev/null +++ b/monitoring/graphite/systemd-units/carbon-relay@.service @@ -0,0 +1,19 @@ +[Unit] +Description=carbon-relay instance %i (graphite) +After=network.target + +[Service] +User=graphite +Group=graphite +PrivateTmp=yes +# Run ExecStartPre with root-permissions +PermissionsStartOnly=true +ExecStartPre=-/bin/mkdir -p /run/graphite +ExecStartPre=/bin/chown -R graphite:graphite /run/graphite +ExecStartPre=/bin/rm -f /run/graphite/carbon-relay-%i.pid +ExecStart=/opt/graphite/bin/carbon-relay.py --instance=%i start +Type=forking +PIDFile=/run/graphite/carbon-relay-%i.pid + +[Install] +WantedBy=multi-user.target diff --git a/monitoring/graphite/uwsgi/apps-enabled/README b/monitoring/graphite/uwsgi/apps-enabled/README new file mode 100644 index 0000000..c3126c4 --- /dev/null +++ b/monitoring/graphite/uwsgi/apps-enabled/README @@ -0,0 +1,10 @@ +Some files found in this directory are processed by uWSGI init.d script as +uWSGI configuration files. + +On system boot for each configuration file new uWSGI daemon instance is started +with additional option. Name of this option is based on configuration file +extension. Path to configuration files is passed as option value. + +See more detailed information at: + * /usr/share/doc/uwsgi/README.Debian.gz + * /etc/default/uwsgi diff --git a/monitoring/graphite/uwsgi/apps-enabled/graphite-web.ini b/monitoring/graphite/uwsgi/apps-enabled/graphite-web.ini new file mode 100644 index 0000000..ab2e0fc --- /dev/null +++ b/monitoring/graphite/uwsgi/apps-enabled/graphite-web.ini @@ -0,0 +1,10 @@ +[uwsgi] +processes = 2 +socket = 127.0.0.1:3031 +gid = www-data +uid = www-data +chdir = /opt/graphite/conf +#wsgi-file = /opt/graphite/conf/graphite.wsgi +pythonpath = /opt/graphite/webapp +env = DJANGO_SETTINGS_MODULE=graphite.settings +module = django.core.wsgi:get_wsgi_application()