2 * collectd - src/statsd.c
3 * Copyright (C) 2013 Florian octo Forster
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Florian octo Forster <octo at collectd.org>
31 #include "utils_avltree.h"
32 #include "utils_latency.h"
36 #include <sys/types.h>
38 /* AIX doesn't have MSG_DONTWAIT */
40 #define MSG_DONTWAIT MSG_NONBLOCK
43 #ifndef STATSD_DEFAULT_NODE
44 #define STATSD_DEFAULT_NODE NULL
47 #ifndef STATSD_DEFAULT_SERVICE
48 #define STATSD_DEFAULT_SERVICE "8125"
51 enum metric_type_e { STATSD_COUNTER, STATSD_TIMER, STATSD_GAUGE, STATSD_SET };
52 typedef enum metric_type_e metric_type_t;
54 struct statsd_metric_s {
58 latency_counter_t *latency;
60 unsigned long updates_num;
62 typedef struct statsd_metric_s statsd_metric_t;
64 static c_avl_tree_t *metrics_tree = NULL;
65 static pthread_mutex_t metrics_lock = PTHREAD_MUTEX_INITIALIZER;
67 static pthread_t network_thread;
68 static _Bool network_thread_running = 0;
69 static _Bool network_thread_shutdown = 0;
71 static char *conf_node = NULL;
72 static char *conf_service = NULL;
74 static _Bool conf_delete_counters = 0;
75 static _Bool conf_delete_timers = 0;
76 static _Bool conf_delete_gauges = 0;
77 static _Bool conf_delete_sets = 0;
79 static double *conf_timer_percentile = NULL;
80 static size_t conf_timer_percentile_num = 0;
82 static _Bool conf_counter_sum = 0;
83 static _Bool conf_timer_lower = 0;
84 static _Bool conf_timer_upper = 0;
85 static _Bool conf_timer_sum = 0;
86 static _Bool conf_timer_count = 0;
88 /* Must hold metrics_lock when calling this function. */
89 static statsd_metric_t *statsd_metric_lookup_unsafe(char const *name, /* {{{ */
91 char key[DATA_MAX_NAME_LEN + 2];
93 statsd_metric_t *metric;
114 sstrncpy(&key[2], name, sizeof(key) - 2);
116 status = c_avl_get(metrics_tree, key, (void *)&metric);
120 key_copy = strdup(key);
121 if (key_copy == NULL) {
122 ERROR("statsd plugin: strdup failed.");
126 metric = calloc(1, sizeof(*metric));
127 if (metric == NULL) {
128 ERROR("statsd plugin: calloc failed.");
134 metric->latency = NULL;
137 status = c_avl_insert(metrics_tree, key_copy, metric);
139 ERROR("statsd plugin: c_avl_insert failed.");
146 } /* }}} statsd_metric_lookup_unsafe */
148 static int statsd_metric_set(char const *name, double value, /* {{{ */
149 metric_type_t type) {
150 statsd_metric_t *metric;
152 pthread_mutex_lock(&metrics_lock);
154 metric = statsd_metric_lookup_unsafe(name, type);
155 if (metric == NULL) {
156 pthread_mutex_unlock(&metrics_lock);
160 metric->value = value;
161 metric->updates_num++;
163 pthread_mutex_unlock(&metrics_lock);
166 } /* }}} int statsd_metric_set */
168 static int statsd_metric_add(char const *name, double delta, /* {{{ */
169 metric_type_t type) {
170 statsd_metric_t *metric;
172 pthread_mutex_lock(&metrics_lock);
174 metric = statsd_metric_lookup_unsafe(name, type);
175 if (metric == NULL) {
176 pthread_mutex_unlock(&metrics_lock);
180 metric->value += delta;
181 metric->updates_num++;
183 pthread_mutex_unlock(&metrics_lock);
186 } /* }}} int statsd_metric_add */
188 static void statsd_metric_free(statsd_metric_t *metric) /* {{{ */
193 if (metric->latency != NULL) {
194 latency_counter_destroy(metric->latency);
195 metric->latency = NULL;
198 if (metric->set != NULL) {
202 while (c_avl_pick(metric->set, &key, &value) == 0) {
204 assert(value == NULL);
207 c_avl_destroy(metric->set);
212 } /* }}} void statsd_metric_free */
214 static int statsd_parse_value(char const *str, value_t *ret_value) /* {{{ */
218 ret_value->gauge = (gauge_t)strtod(str, &endptr);
219 if ((str == endptr) || ((endptr != NULL) && (*endptr != 0)))
223 } /* }}} int statsd_parse_value */
225 static int statsd_handle_counter(char const *name, /* {{{ */
226 char const *value_str, char const *extra) {
231 if ((extra != NULL) && (extra[0] != '@'))
236 status = statsd_parse_value(extra + 1, &scale);
240 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
245 status = statsd_parse_value(value_str, &value);
249 /* Changes to the counter are added to (statsd_metric_t*)->value. ->counter is
250 * only updated in statsd_metric_submit_unsafe(). */
251 return statsd_metric_add(name, (double)(value.gauge / scale.gauge),
253 } /* }}} int statsd_handle_counter */
255 static int statsd_handle_gauge(char const *name, /* {{{ */
256 char const *value_str) {
261 status = statsd_parse_value(value_str, &value);
265 if ((value_str[0] == '+') || (value_str[0] == '-'))
266 return statsd_metric_add(name, (double)value.gauge, STATSD_GAUGE);
268 return statsd_metric_set(name, (double)value.gauge, STATSD_GAUGE);
269 } /* }}} int statsd_handle_gauge */
271 static int statsd_handle_timer(char const *name, /* {{{ */
272 char const *value_str, char const *extra) {
273 statsd_metric_t *metric;
279 if ((extra != NULL) && (extra[0] != '@'))
284 status = statsd_parse_value(extra + 1, &scale);
288 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
293 status = statsd_parse_value(value_str, &value_ms);
297 value = MS_TO_CDTIME_T(value_ms.gauge / scale.gauge);
299 pthread_mutex_lock(&metrics_lock);
301 metric = statsd_metric_lookup_unsafe(name, STATSD_TIMER);
302 if (metric == NULL) {
303 pthread_mutex_unlock(&metrics_lock);
307 if (metric->latency == NULL)
308 metric->latency = latency_counter_create();
309 if (metric->latency == NULL) {
310 pthread_mutex_unlock(&metrics_lock);
314 latency_counter_add(metric->latency, value);
315 metric->updates_num++;
317 pthread_mutex_unlock(&metrics_lock);
319 } /* }}} int statsd_handle_timer */
321 static int statsd_handle_set(char const *name, /* {{{ */
322 char const *set_key_orig) {
323 statsd_metric_t *metric = NULL;
327 pthread_mutex_lock(&metrics_lock);
329 metric = statsd_metric_lookup_unsafe(name, STATSD_SET);
330 if (metric == NULL) {
331 pthread_mutex_unlock(&metrics_lock);
335 /* Make sure metric->set exists. */
336 if (metric->set == NULL)
337 metric->set = c_avl_create((int (*)(const void *, const void *))strcmp);
339 if (metric->set == NULL) {
340 pthread_mutex_unlock(&metrics_lock);
341 ERROR("statsd plugin: c_avl_create failed.");
345 set_key = strdup(set_key_orig);
346 if (set_key == NULL) {
347 pthread_mutex_unlock(&metrics_lock);
348 ERROR("statsd plugin: strdup failed.");
352 status = c_avl_insert(metric->set, set_key, /* value = */ NULL);
354 pthread_mutex_unlock(&metrics_lock);
356 ERROR("statsd plugin: c_avl_insert (\"%s\") failed with status %i.",
360 } else if (status > 0) /* key already exists */
365 metric->updates_num++;
367 pthread_mutex_unlock(&metrics_lock);
369 } /* }}} int statsd_handle_set */
371 static int statsd_parse_line(char *buffer) /* {{{ */
378 type = strchr(name, '|');
384 value = strrchr(name, ':');
390 extra = strchr(type, '|');
396 if (strcmp("c", type) == 0)
397 return statsd_handle_counter(name, value, extra);
398 else if (strcmp("ms", type) == 0)
399 return statsd_handle_timer(name, value, extra);
401 /* extra is only valid for counters and timers */
405 if (strcmp("g", type) == 0)
406 return statsd_handle_gauge(name, value);
407 else if (strcmp("s", type) == 0)
408 return statsd_handle_set(name, value);
411 } /* }}} void statsd_parse_line */
413 static void statsd_parse_buffer(char *buffer) /* {{{ */
415 while (buffer != NULL) {
420 next = strchr(buffer, '\n');
431 sstrncpy(orig, buffer, sizeof(orig));
433 status = statsd_parse_line(buffer);
435 ERROR("statsd plugin: Unable to parse line: \"%s\"", orig);
439 } /* }}} void statsd_parse_buffer */
441 static void statsd_network_read(int fd) /* {{{ */
447 status = recv(fd, buffer, sizeof(buffer), /* flags = */ MSG_DONTWAIT);
451 if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
454 ERROR("statsd plugin: recv(2) failed: %s",
455 sstrerror(errno, errbuf, sizeof(errbuf)));
459 buffer_size = (size_t)status;
460 if (buffer_size >= sizeof(buffer))
461 buffer_size = sizeof(buffer) - 1;
462 buffer[buffer_size] = 0;
464 statsd_parse_buffer(buffer);
465 } /* }}} void statsd_network_read */
467 static int statsd_network_init(struct pollfd **ret_fds, /* {{{ */
468 size_t *ret_fds_num) {
469 struct pollfd *fds = NULL;
472 struct addrinfo *ai_list;
475 char const *node = (conf_node != NULL) ? conf_node : STATSD_DEFAULT_NODE;
476 char const *service =
477 (conf_service != NULL) ? conf_service : STATSD_DEFAULT_SERVICE;
479 struct addrinfo ai_hints = {.ai_family = AF_UNSPEC,
480 .ai_flags = AI_PASSIVE | AI_ADDRCONFIG,
481 .ai_socktype = SOCK_DGRAM};
483 status = getaddrinfo(node, service, &ai_hints, &ai_list);
485 ERROR("statsd plugin: getaddrinfo (\"%s\", \"%s\") failed: %s", node,
486 service, gai_strerror(status));
490 for (struct addrinfo *ai_ptr = ai_list; ai_ptr != NULL;
491 ai_ptr = ai_ptr->ai_next) {
495 char dbg_node[NI_MAXHOST];
496 char dbg_service[NI_MAXSERV];
498 fd = socket(ai_ptr->ai_family, ai_ptr->ai_socktype, ai_ptr->ai_protocol);
501 ERROR("statsd plugin: socket(2) failed: %s",
502 sstrerror(errno, errbuf, sizeof(errbuf)));
506 getnameinfo(ai_ptr->ai_addr, ai_ptr->ai_addrlen, dbg_node, sizeof(dbg_node),
507 dbg_service, sizeof(dbg_service),
508 NI_DGRAM | NI_NUMERICHOST | NI_NUMERICSERV);
509 DEBUG("statsd plugin: Trying to bind to [%s]:%s ...", dbg_node,
512 status = bind(fd, ai_ptr->ai_addr, ai_ptr->ai_addrlen);
515 ERROR("statsd plugin: bind(2) failed: %s",
516 sstrerror(errno, errbuf, sizeof(errbuf)));
521 tmp = realloc(fds, sizeof(*fds) * (fds_num + 1));
523 ERROR("statsd plugin: realloc failed.");
531 memset(tmp, 0, sizeof(*tmp));
533 tmp->events = POLLIN | POLLPRI;
536 freeaddrinfo(ai_list);
539 ERROR("statsd plugin: Unable to create listening socket for [%s]:%s.",
540 (node != NULL) ? node : "::", service);
545 *ret_fds_num = fds_num;
547 } /* }}} int statsd_network_init */
549 static void *statsd_network_thread(void *args) /* {{{ */
551 struct pollfd *fds = NULL;
555 status = statsd_network_init(&fds, &fds_num);
557 ERROR("statsd plugin: Unable to open listening sockets.");
558 pthread_exit((void *)0);
561 while (!network_thread_shutdown) {
562 status = poll(fds, (nfds_t)fds_num, /* timeout = */ -1);
566 if ((errno == EINTR) || (errno == EAGAIN))
569 ERROR("statsd plugin: poll(2) failed: %s",
570 sstrerror(errno, errbuf, sizeof(errbuf)));
574 for (size_t i = 0; i < fds_num; i++) {
575 if ((fds[i].revents & (POLLIN | POLLPRI)) == 0)
578 statsd_network_read(fds[i].fd);
581 } /* while (!network_thread_shutdown) */
584 for (size_t i = 0; i < fds_num; i++)
589 } /* }}} void *statsd_network_thread */
591 static int statsd_config_timer_percentile(oconfig_item_t *ci) /* {{{ */
593 double percent = NAN;
597 status = cf_util_get_double(ci, &percent);
601 if ((percent <= 0.0) || (percent >= 100)) {
602 ERROR("statsd plugin: The value for \"%s\" must be between 0 and 100, "
609 realloc(conf_timer_percentile,
610 sizeof(*conf_timer_percentile) * (conf_timer_percentile_num + 1));
612 ERROR("statsd plugin: realloc failed.");
615 conf_timer_percentile = tmp;
616 conf_timer_percentile[conf_timer_percentile_num] = percent;
617 conf_timer_percentile_num++;
620 } /* }}} int statsd_config_timer_percentile */
622 static int statsd_config(oconfig_item_t *ci) /* {{{ */
624 for (int i = 0; i < ci->children_num; i++) {
625 oconfig_item_t *child = ci->children + i;
627 if (strcasecmp("Host", child->key) == 0)
628 cf_util_get_string(child, &conf_node);
629 else if (strcasecmp("Port", child->key) == 0)
630 cf_util_get_service(child, &conf_service);
631 else if (strcasecmp("DeleteCounters", child->key) == 0)
632 cf_util_get_boolean(child, &conf_delete_counters);
633 else if (strcasecmp("DeleteTimers", child->key) == 0)
634 cf_util_get_boolean(child, &conf_delete_timers);
635 else if (strcasecmp("DeleteGauges", child->key) == 0)
636 cf_util_get_boolean(child, &conf_delete_gauges);
637 else if (strcasecmp("DeleteSets", child->key) == 0)
638 cf_util_get_boolean(child, &conf_delete_sets);
639 else if (strcasecmp("CounterSum", child->key) == 0)
640 cf_util_get_boolean(child, &conf_counter_sum);
641 else if (strcasecmp("TimerLower", child->key) == 0)
642 cf_util_get_boolean(child, &conf_timer_lower);
643 else if (strcasecmp("TimerUpper", child->key) == 0)
644 cf_util_get_boolean(child, &conf_timer_upper);
645 else if (strcasecmp("TimerSum", child->key) == 0)
646 cf_util_get_boolean(child, &conf_timer_sum);
647 else if (strcasecmp("TimerCount", child->key) == 0)
648 cf_util_get_boolean(child, &conf_timer_count);
649 else if (strcasecmp("TimerPercentile", child->key) == 0)
650 statsd_config_timer_percentile(child);
652 ERROR("statsd plugin: The \"%s\" config option is not valid.",
657 } /* }}} int statsd_config */
659 static int statsd_init(void) /* {{{ */
661 pthread_mutex_lock(&metrics_lock);
662 if (metrics_tree == NULL)
663 metrics_tree = c_avl_create((int (*)(const void *, const void *))strcmp);
665 if (!network_thread_running) {
668 status = pthread_create(&network_thread,
669 /* attr = */ NULL, statsd_network_thread,
673 pthread_mutex_unlock(&metrics_lock);
674 ERROR("statsd plugin: pthread_create failed: %s",
675 sstrerror(errno, errbuf, sizeof(errbuf)));
679 network_thread_running = 1;
681 pthread_mutex_unlock(&metrics_lock);
684 } /* }}} int statsd_init */
686 /* Must hold metrics_lock when calling this function. */
687 static int statsd_metric_clear_set_unsafe(statsd_metric_t *metric) /* {{{ */
692 if ((metric == NULL) || (metric->type != STATSD_SET))
695 if (metric->set == NULL)
698 while (c_avl_pick(metric->set, &key, &value) == 0) {
704 } /* }}} int statsd_metric_clear_set_unsafe */
706 /* Must hold metrics_lock when calling this function. */
707 static int statsd_metric_submit_unsafe(char const *name,
708 statsd_metric_t *metric) /* {{{ */
710 value_list_t vl = VALUE_LIST_INIT;
712 vl.values = &(value_t){.gauge = NAN};
714 sstrncpy(vl.plugin, "statsd", sizeof(vl.plugin));
716 if (metric->type == STATSD_GAUGE)
717 sstrncpy(vl.type, "gauge", sizeof(vl.type));
718 else if (metric->type == STATSD_TIMER)
719 sstrncpy(vl.type, "latency", sizeof(vl.type));
720 else if (metric->type == STATSD_SET)
721 sstrncpy(vl.type, "objects", sizeof(vl.type));
722 else /* if (metric->type == STATSD_COUNTER) */
723 sstrncpy(vl.type, "derive", sizeof(vl.type));
725 sstrncpy(vl.type_instance, name, sizeof(vl.type_instance));
727 if (metric->type == STATSD_GAUGE)
728 vl.values[0].gauge = (gauge_t)metric->value;
729 else if (metric->type == STATSD_TIMER) {
730 _Bool have_events = (metric->updates_num > 0);
732 /* Make sure all timer metrics share the *same* timestamp. */
735 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-average", name);
738 ? CDTIME_T_TO_DOUBLE(latency_counter_get_average(metric->latency))
740 plugin_dispatch_values(&vl);
742 if (conf_timer_lower) {
743 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-lower", name);
746 ? CDTIME_T_TO_DOUBLE(latency_counter_get_min(metric->latency))
748 plugin_dispatch_values(&vl);
751 if (conf_timer_upper) {
752 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-upper", name);
755 ? CDTIME_T_TO_DOUBLE(latency_counter_get_max(metric->latency))
757 plugin_dispatch_values(&vl);
760 if (conf_timer_sum) {
761 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-sum", name);
764 ? CDTIME_T_TO_DOUBLE(latency_counter_get_sum(metric->latency))
766 plugin_dispatch_values(&vl);
769 for (size_t i = 0; i < conf_timer_percentile_num; i++) {
770 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-percentile-%.0f",
771 name, conf_timer_percentile[i]);
773 have_events ? CDTIME_T_TO_DOUBLE(latency_counter_get_percentile(
774 metric->latency, conf_timer_percentile[i]))
776 plugin_dispatch_values(&vl);
779 /* Keep this at the end, since vl.type is set to "gauge" here. The
780 * vl.type's above are implicitly set to "latency". */
781 if (conf_timer_count) {
782 sstrncpy(vl.type, "gauge", sizeof(vl.type));
783 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-count", name);
784 vl.values[0].gauge = latency_counter_get_num(metric->latency);
785 plugin_dispatch_values(&vl);
788 latency_counter_reset(metric->latency);
790 } else if (metric->type == STATSD_SET) {
791 if (metric->set == NULL)
792 vl.values[0].gauge = 0.0;
794 vl.values[0].gauge = (gauge_t)c_avl_size(metric->set);
795 } else { /* STATSD_COUNTER */
796 gauge_t delta = nearbyint(metric->value);
798 /* Etsy's statsd writes counters as two metrics: a rate and the change since
799 * the last write. Since collectd does not reset its DERIVE metrics to zero,
800 * this makes little sense, but we're dispatching a "count" metric here
801 * anyway - if requested by the user - for compatibility reasons. */
802 if (conf_counter_sum) {
803 sstrncpy(vl.type, "count", sizeof(vl.type));
804 vl.values[0].gauge = delta;
805 plugin_dispatch_values(&vl);
807 /* restore vl.type */
808 sstrncpy(vl.type, "derive", sizeof(vl.type));
811 /* Rather than resetting value to zero, subtract delta so we correctly keep
812 * track of residuals. */
813 metric->value -= delta;
814 metric->counter += (derive_t)delta;
816 vl.values[0].derive = metric->counter;
819 return plugin_dispatch_values(&vl);
820 } /* }}} int statsd_metric_submit_unsafe */
822 static int statsd_read(void) /* {{{ */
824 c_avl_iterator_t *iter;
826 statsd_metric_t *metric;
828 char **to_be_deleted = NULL;
829 size_t to_be_deleted_num = 0;
831 pthread_mutex_lock(&metrics_lock);
833 if (metrics_tree == NULL) {
834 pthread_mutex_unlock(&metrics_lock);
838 iter = c_avl_get_iterator(metrics_tree);
839 while (c_avl_iterator_next(iter, (void *)&name, (void *)&metric) == 0) {
840 if ((metric->updates_num == 0) &&
841 ((conf_delete_counters && (metric->type == STATSD_COUNTER)) ||
842 (conf_delete_timers && (metric->type == STATSD_TIMER)) ||
843 (conf_delete_gauges && (metric->type == STATSD_GAUGE)) ||
844 (conf_delete_sets && (metric->type == STATSD_SET)))) {
845 DEBUG("statsd plugin: Deleting metric \"%s\".", name);
846 strarray_add(&to_be_deleted, &to_be_deleted_num, name);
850 /* Names have a prefix, e.g. "c:", which determines the (statsd) type.
851 * Remove this here. */
852 statsd_metric_submit_unsafe(name + 2, metric);
854 /* Reset the metric. */
855 metric->updates_num = 0;
856 if (metric->type == STATSD_SET)
857 statsd_metric_clear_set_unsafe(metric);
859 c_avl_iterator_destroy(iter);
861 for (size_t i = 0; i < to_be_deleted_num; i++) {
864 status = c_avl_remove(metrics_tree, to_be_deleted[i], (void *)&name,
867 ERROR("stats plugin: c_avl_remove (\"%s\") failed with status %i.",
868 to_be_deleted[i], status);
873 statsd_metric_free(metric);
876 pthread_mutex_unlock(&metrics_lock);
878 strarray_free(to_be_deleted, to_be_deleted_num);
881 } /* }}} int statsd_read */
883 static int statsd_shutdown(void) /* {{{ */
888 if (network_thread_running) {
889 network_thread_shutdown = 1;
890 pthread_kill(network_thread, SIGTERM);
891 pthread_join(network_thread, /* retval = */ NULL);
893 network_thread_running = 0;
895 pthread_mutex_lock(&metrics_lock);
897 while (c_avl_pick(metrics_tree, &key, &value) == 0) {
899 statsd_metric_free(value);
901 c_avl_destroy(metrics_tree);
907 pthread_mutex_unlock(&metrics_lock);
910 } /* }}} int statsd_shutdown */
912 void module_register(void) {
913 plugin_register_complex_config("statsd", statsd_config);
914 plugin_register_init("statsd", statsd_init);
915 plugin_register_read("statsd", statsd_read);
916 plugin_register_shutdown("statsd", statsd_shutdown);