From 76de7a31ef8d235ec7ff252b0f752925d3307820 Mon Sep 17 00:00:00 2001 From: Vladimir Vitkov Date: Mon, 19 Oct 2015 11:43:38 +0300 Subject: [PATCH] Graphite - config * Readme * Graphite config for the ingestion chain --- monitoring/graphite/README.md | 22 ++++ monitoring/graphite/graphite-conf/carbon.conf | 113 ++++++++++++++++++ .../graphite/graphite-conf/graphite.wsgi | 25 ++++ .../graphite/graphite-conf/relay-rules.conf | 21 ++++ .../graphite/graphite-conf/rewrite-rules.conf | 26 ++++ .../graphite-conf/storage-aggregation.conf | 32 +++++ .../graphite-conf/storage-schemas.conf | 17 +++ 7 files changed, 256 insertions(+) create mode 100644 monitoring/graphite/README.md create mode 100644 monitoring/graphite/graphite-conf/carbon.conf create mode 100644 monitoring/graphite/graphite-conf/graphite.wsgi create mode 100644 monitoring/graphite/graphite-conf/relay-rules.conf create mode 100644 monitoring/graphite/graphite-conf/rewrite-rules.conf create mode 100644 monitoring/graphite/graphite-conf/storage-aggregation.conf create mode 100644 monitoring/graphite/graphite-conf/storage-schemas.conf diff --git a/monitoring/graphite/README.md b/monitoring/graphite/README.md new file mode 100644 index 0000000..7a805b2 --- /dev/null +++ b/monitoring/graphite/README.md @@ -0,0 +1,22 @@ +# Graphite config +Configuration for graphite/carbon + +# Design +System is designed to receive data from collectd, pass it trough aggregation/relay and finally stuff it into cache + +``` ++----------+ +| collectd | ++----------+ + || + \/ ++------------+ +| aggregator | ++------------+ + || + \/ + +-------+ + | cache | + +-------+ + +``` diff --git a/monitoring/graphite/graphite-conf/carbon.conf b/monitoring/graphite/graphite-conf/carbon.conf new file mode 100644 index 0000000..767b33d --- /dev/null +++ b/monitoring/graphite/graphite-conf/carbon.conf @@ -0,0 +1,113 @@ +[aggregator:top] +# filesystem organization +GRAPHITE_ROOT = /opt/graphite +GRAPHITE_CONF_DIR = /opt/graphite/conf +CONF_DIR = /opt/graphite/conf +GRAPHITE_STORAGE_DIR = /opt/graphite/storage +PID_DIR = /run/graphite +LOG_DIR = /var/log/graphite +LOCAL_DATA_DIR = /opt/graphite/storage +USER = graphite + +# listeners +# echo ... | telnet +LINE_RECEIVER_INTERFACE = 0.0.0.0 +LINE_RECEIVER_PORT = 10001 +# programatic +PICKLE_RECEIVER_INTERFACE = 0.0.0.0 +PICKLE_RECEIVER_PORT = 10101 + +#UDP Listener +ENABLE_UDP_LISTENER = True +UDP_RECEIVER_INTERFACE = 127.0.0.1 +UDP_RECEIVER_PORT = 10001 + +# log connections? +LOG_LISTENER_CONNECTIONS = True + +# how to relay: rules|consistent-hashing|aggregated-consistent-hashing +RELAY_METHOD = consistent-hashing +REPLICATION_FACTOR = 1 + +AGGREGATION_RULES = aggregation-rules.conf +REWRITE_RULES = rewrite-rules.conf +FORWARD_ALL = True + +DESTINATIONS = 127.0.0.1:2004:a + +# tuning/queue/capacity +# This defines the maximum "message size" between carbon daemons. +# You shouldn't need to tune this unless you really know what you're doing. +MAX_DATAPOINTS_PER_MESSAGE = 500 +MAX_QUEUE_SIZE = 20000 +# This is the percentage that the queue must be empty before it will accept +# more messages. For a larger site, if the queue is very large it makes sense +# to tune this to allow for incoming stats. So if you have an average +# flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense +# to allow stats to start flowing when you've cleared the queue to 95% since +# you should have space to accommodate the next minute's worth of stats +# even before the relay incrementally clears more of the queue +QUEUE_LOW_WATERMARK_PCT = 0.8 + +# Set this to False to drop datapoints when any send queue (sending datapoints +# to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the +# default) then sockets over which metrics are received will temporarily stop accepting +# data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE. +USE_FLOW_CONTROL = True + +# Set this to True to enable whitelisting and blacklisting of metrics in +# CONF_DIR/whitelist and CONF_DIR/blacklist. If the whitelist is missing or +# empty, all metrics will pass through +# USE_WHITELIST = False + +# By default, carbon itself will log statistics (such as a count, +# metricsReceived) with the top level prefix of 'carbon' at an interval of 60 +# seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation +CARBON_METRIC_PREFIX = carbon +CARBON_METRIC_INTERVAL = 10 + +[cache:a] +# filesystem organization +GRAPHITE_ROOT = /opt/graphite +GRAPHITE_CONF_DIR = /opt/graphite/conf +CONF_DIR = /opt/graphite/conf +GRAPHITE_STORAGE_DIR = /opt/graphite/storage +PID_DIR = /run/graphite +LOG_DIR = /var/log/graphite +LOCAL_DATA_DIR = /opt/graphite/storage +USER = graphite + +# cache / mem sizing +MAX_CACHE_SIZE = 500000 +MAX_UPDATES_PER_SECOND = 500 +MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1500 +MAX_CREATES_PER_MINUTE = 120 +# WHISPER_SPARSE_CREATE = False +# delegate creation work to kernel (if available) falls back on posix calls +WHISPER_FALLOCATE_CREATE = True +# if running on shared disk and multiple caches write to same file +# WHISPER_LOCK_WRITES = False + +# listeners +LINE_RECEIVER_INTERFACE = localhost +LINE_RECEIVER_PORT = 2003 +ENABLE_UDP_LISTENER = False +UDP_RECEIVER_INTERFACE = localhost +UDP_RECEIVER_PORT = 2003 +PICKLE_RECEIVER_INTERFACE = localhost +PICKLE_RECEIVER_PORT = 2004 +# interface for bulk queries +CACHE_QUERY_INTERFACE = localhost +CACHE_QUERY_PORT = 2005 + +#some logging +LOG_LISTENER_CONNECTIONS = True +LOG_UPDATES = True +LOG_CACHE_HITS = False +LOG_CACHE_QUEUE_SORTS = False + +CACHE_WRITE_STRATEGY = sorted +WHISPER_AUTOFLUSH = False + +CARBON_METRIC_PREFIX = carbon +CARBON_METRIC_INTERVAL = 10 diff --git a/monitoring/graphite/graphite-conf/graphite.wsgi b/monitoring/graphite/graphite-conf/graphite.wsgi new file mode 100644 index 0000000..a76efe0 --- /dev/null +++ b/monitoring/graphite/graphite-conf/graphite.wsgi @@ -0,0 +1,25 @@ +import os, sys +sys.path.append('/mnt/graphite/webapp') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'graphite.settings') + +import django + +if django.VERSION < (1, 4): + from django.core.handlers.wsgi import WSGIHandler + application = WSGIHandler() +else: + # From 1.4 wsgi support was improved and since 1.7 old style WSGI script + # causes AppRegistryNotReady exception + # https://docs.djangoproject.com/en/dev/releases/1.7/#wsgi-scripts + from django.core.wsgi import get_wsgi_application + application = get_wsgi_application() + + +# READ THIS +# Initializing the search index can be very expensive, please include +# the WSGIImportScript directive pointing to this script in your vhost +# config to ensure the index is preloaded before any requests are handed +# to the process. +from graphite.logger import log +log.info("graphite.wsgi - pid %d - reloading search index" % os.getpid()) +import graphite.metrics.search diff --git a/monitoring/graphite/graphite-conf/relay-rules.conf b/monitoring/graphite/graphite-conf/relay-rules.conf new file mode 100644 index 0000000..36cabdb --- /dev/null +++ b/monitoring/graphite/graphite-conf/relay-rules.conf @@ -0,0 +1,21 @@ +# Relay destination rules for carbon-relay. Entries are scanned in order, +# and the first pattern a metric matches will cause processing to cease after sending +# unless `continue` is set to true +# +# [name] +# pattern = +# destinations = +# continue = # default: False +# +# name: Arbitrary unique name to identify the rule +# pattern: Regex pattern to match against the metric name +# destinations: Comma-separated list of destinations. +# ex: 127.0.0.1, 10.1.2.3:2004, 10.1.2.4:2004:a, myserver.mydomain.com +# continue: Continue processing rules if this rule matches (default: False) + +# You must have exactly one section with 'default = true' +# Note that all destinations listed must also exist in carbon.conf +# in the DESTINATIONS setting in the [relay] section +[default] +default = true +destinations = 127.0.0.1:2004:a diff --git a/monitoring/graphite/graphite-conf/rewrite-rules.conf b/monitoring/graphite/graphite-conf/rewrite-rules.conf new file mode 100644 index 0000000..c3167ed --- /dev/null +++ b/monitoring/graphite/graphite-conf/rewrite-rules.conf @@ -0,0 +1,26 @@ +# This file defines regular expression patterns that can be used to +# rewrite metric names in a search & replace fashion. It consists of two +# sections, [pre] and [post]. The rules in the pre section are applied to +# metric names as soon as they are received. The post rules are applied +# after aggregation has taken place. +# +# The general form of each rule is as follows: +# +# regex-pattern = replacement-text +# +# For example: +# +# [post] +# _sum$ = +# _avg$ = +# +# These rules would strip off a suffix of _sum or _avg from any metric names +# after aggregation. +[pre] +# fix parens to underscore +\) = _ +\( = _ + +# Rewrite temperature sensors to be less ugly +# temperature-Temp_1_system_internal_expansion_board_16_5_/value.wsp +temperature-Temp_([0-9]+)_([a-zA-Z0-9_]+)__(.*)\. = Temp-\1_\2. diff --git a/monitoring/graphite/graphite-conf/storage-aggregation.conf b/monitoring/graphite/graphite-conf/storage-aggregation.conf new file mode 100644 index 0000000..989146c --- /dev/null +++ b/monitoring/graphite/graphite-conf/storage-aggregation.conf @@ -0,0 +1,32 @@ +# Aggregation methods for whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds +# +# [name] +# pattern = +# xFilesFactor = +# aggregationMethod = +# +# name: Arbitrary unique name for the rule +# pattern: Regex pattern to match against the metric name +# xFilesFactor: Ratio of valid data points required for aggregation to the next retention to occur +# aggregationMethod: function to apply to data points for aggregation +# +[min] +pattern = \.min$ +xFilesFactor = 0.1 +aggregationMethod = min + +[max] +pattern = \.max$ +xFilesFactor = 0.1 +aggregationMethod = max + +[sum] +pattern = \.count$ +xFilesFactor = 0 +aggregationMethod = sum + +[default_average] +pattern = .* +xFilesFactor = 0.5 +aggregationMethod = average diff --git a/monitoring/graphite/graphite-conf/storage-schemas.conf b/monitoring/graphite/graphite-conf/storage-schemas.conf new file mode 100644 index 0000000..47cf1a4 --- /dev/null +++ b/monitoring/graphite/graphite-conf/storage-schemas.conf @@ -0,0 +1,17 @@ +# Schema definitions for Whisper files. Entries are scanned in order, +# and first match wins. This file is scanned for changes every 60 seconds. +# +# [name] +# pattern = regex +# retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... + +# Carbon's internal metrics. This entry should match what is specified in +# CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings +[carbon] +pattern = ^carbon\. +retentions = 10:30d + +# final defaults +[default_10s_6months] +pattern = .* +retentions = 10s:30d