2 * collectd - src/statsd.c
3 * Copyright (C) 2013 Florian octo Forster
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Florian octo Forster <octo at collectd.org>
31 #include "utils_avltree.h"
32 #include "utils_latency.h"
36 #include <sys/types.h>
38 /* AIX doesn't have MSG_DONTWAIT */
40 #define MSG_DONTWAIT MSG_NONBLOCK
43 #ifndef STATSD_DEFAULT_NODE
44 #define STATSD_DEFAULT_NODE NULL
47 #ifndef STATSD_DEFAULT_SERVICE
48 #define STATSD_DEFAULT_SERVICE "8125"
51 enum metric_type_e { STATSD_COUNTER, STATSD_TIMER, STATSD_GAUGE, STATSD_SET };
52 typedef enum metric_type_e metric_type_t;
54 struct statsd_metric_s {
58 latency_counter_t *latency;
60 unsigned long updates_num;
62 typedef struct statsd_metric_s statsd_metric_t;
64 static c_avl_tree_t *metrics_tree = NULL;
65 static pthread_mutex_t metrics_lock = PTHREAD_MUTEX_INITIALIZER;
67 static pthread_t network_thread;
68 static bool network_thread_running;
69 static bool network_thread_shutdown;
71 static char *conf_node = NULL;
72 static char *conf_service = NULL;
74 static bool conf_delete_counters;
75 static bool conf_delete_timers;
76 static bool conf_delete_gauges;
77 static bool conf_delete_sets;
79 static double *conf_timer_percentile = NULL;
80 static size_t conf_timer_percentile_num;
82 static bool conf_counter_sum;
83 static bool conf_timer_lower;
84 static bool conf_timer_upper;
85 static bool conf_timer_sum;
86 static bool conf_timer_count;
88 /* Must hold metrics_lock when calling this function. */
89 static statsd_metric_t *statsd_metric_lookup_unsafe(char const *name, /* {{{ */
91 char key[DATA_MAX_NAME_LEN + 2];
93 statsd_metric_t *metric;
114 sstrncpy(&key[2], name, sizeof(key) - 2);
116 status = c_avl_get(metrics_tree, key, (void *)&metric);
120 key_copy = strdup(key);
121 if (key_copy == NULL) {
122 ERROR("statsd plugin: strdup failed.");
126 metric = calloc(1, sizeof(*metric));
127 if (metric == NULL) {
128 ERROR("statsd plugin: calloc failed.");
134 metric->latency = NULL;
137 status = c_avl_insert(metrics_tree, key_copy, metric);
139 ERROR("statsd plugin: c_avl_insert failed.");
146 } /* }}} statsd_metric_lookup_unsafe */
148 static int statsd_metric_set(char const *name, double value, /* {{{ */
149 metric_type_t type) {
150 statsd_metric_t *metric;
152 pthread_mutex_lock(&metrics_lock);
154 metric = statsd_metric_lookup_unsafe(name, type);
155 if (metric == NULL) {
156 pthread_mutex_unlock(&metrics_lock);
160 metric->value = value;
161 metric->updates_num++;
163 pthread_mutex_unlock(&metrics_lock);
166 } /* }}} int statsd_metric_set */
168 static int statsd_metric_add(char const *name, double delta, /* {{{ */
169 metric_type_t type) {
170 statsd_metric_t *metric;
172 pthread_mutex_lock(&metrics_lock);
174 metric = statsd_metric_lookup_unsafe(name, type);
175 if (metric == NULL) {
176 pthread_mutex_unlock(&metrics_lock);
180 metric->value += delta;
181 metric->updates_num++;
183 pthread_mutex_unlock(&metrics_lock);
186 } /* }}} int statsd_metric_add */
188 static void statsd_metric_free(statsd_metric_t *metric) /* {{{ */
193 if (metric->latency != NULL) {
194 latency_counter_destroy(metric->latency);
195 metric->latency = NULL;
198 if (metric->set != NULL) {
202 while (c_avl_pick(metric->set, &key, &value) == 0) {
204 assert(value == NULL);
207 c_avl_destroy(metric->set);
212 } /* }}} void statsd_metric_free */
214 static int statsd_parse_value(char const *str, value_t *ret_value) /* {{{ */
218 ret_value->gauge = (gauge_t)strtod(str, &endptr);
219 if ((str == endptr) || ((endptr != NULL) && (*endptr != 0)))
223 } /* }}} int statsd_parse_value */
225 static int statsd_handle_counter(char const *name, /* {{{ */
226 char const *value_str, char const *extra) {
231 if ((extra != NULL) && (extra[0] != '@'))
236 status = statsd_parse_value(extra + 1, &scale);
240 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
245 status = statsd_parse_value(value_str, &value);
249 /* Changes to the counter are added to (statsd_metric_t*)->value. ->counter is
250 * only updated in statsd_metric_submit_unsafe(). */
251 return statsd_metric_add(name, (double)(value.gauge / scale.gauge),
253 } /* }}} int statsd_handle_counter */
255 static int statsd_handle_gauge(char const *name, /* {{{ */
256 char const *value_str) {
261 status = statsd_parse_value(value_str, &value);
265 if ((value_str[0] == '+') || (value_str[0] == '-'))
266 return statsd_metric_add(name, (double)value.gauge, STATSD_GAUGE);
268 return statsd_metric_set(name, (double)value.gauge, STATSD_GAUGE);
269 } /* }}} int statsd_handle_gauge */
271 static int statsd_handle_timer(char const *name, /* {{{ */
272 char const *value_str, char const *extra) {
273 statsd_metric_t *metric;
279 if ((extra != NULL) && (extra[0] != '@'))
284 status = statsd_parse_value(extra + 1, &scale);
288 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
293 status = statsd_parse_value(value_str, &value_ms);
297 value = MS_TO_CDTIME_T(value_ms.gauge / scale.gauge);
299 pthread_mutex_lock(&metrics_lock);
301 metric = statsd_metric_lookup_unsafe(name, STATSD_TIMER);
302 if (metric == NULL) {
303 pthread_mutex_unlock(&metrics_lock);
307 if (metric->latency == NULL)
308 metric->latency = latency_counter_create();
309 if (metric->latency == NULL) {
310 pthread_mutex_unlock(&metrics_lock);
314 latency_counter_add(metric->latency, value);
315 metric->updates_num++;
317 pthread_mutex_unlock(&metrics_lock);
319 } /* }}} int statsd_handle_timer */
321 static int statsd_handle_set(char const *name, /* {{{ */
322 char const *set_key_orig) {
323 statsd_metric_t *metric = NULL;
327 pthread_mutex_lock(&metrics_lock);
329 metric = statsd_metric_lookup_unsafe(name, STATSD_SET);
330 if (metric == NULL) {
331 pthread_mutex_unlock(&metrics_lock);
335 /* Make sure metric->set exists. */
336 if (metric->set == NULL)
337 metric->set = c_avl_create((int (*)(const void *, const void *))strcmp);
339 if (metric->set == NULL) {
340 pthread_mutex_unlock(&metrics_lock);
341 ERROR("statsd plugin: c_avl_create failed.");
345 set_key = strdup(set_key_orig);
346 if (set_key == NULL) {
347 pthread_mutex_unlock(&metrics_lock);
348 ERROR("statsd plugin: strdup failed.");
352 status = c_avl_insert(metric->set, set_key, /* value = */ NULL);
354 pthread_mutex_unlock(&metrics_lock);
356 ERROR("statsd plugin: c_avl_insert (\"%s\") failed with status %i.",
360 } else if (status > 0) /* key already exists */
365 metric->updates_num++;
367 pthread_mutex_unlock(&metrics_lock);
369 } /* }}} int statsd_handle_set */
371 static int statsd_parse_line(char *buffer) /* {{{ */
378 type = strchr(name, '|');
384 value = strrchr(name, ':');
390 extra = strchr(type, '|');
396 if (strcmp("c", type) == 0)
397 return statsd_handle_counter(name, value, extra);
398 else if (strcmp("ms", type) == 0)
399 return statsd_handle_timer(name, value, extra);
401 /* extra is only valid for counters and timers */
405 if (strcmp("g", type) == 0)
406 return statsd_handle_gauge(name, value);
407 else if (strcmp("s", type) == 0)
408 return statsd_handle_set(name, value);
411 } /* }}} void statsd_parse_line */
413 static void statsd_parse_buffer(char *buffer) /* {{{ */
415 while (buffer != NULL) {
420 next = strchr(buffer, '\n');
431 sstrncpy(orig, buffer, sizeof(orig));
433 status = statsd_parse_line(buffer);
435 ERROR("statsd plugin: Unable to parse line: \"%s\"", orig);
439 } /* }}} void statsd_parse_buffer */
441 static void statsd_network_read(int fd) /* {{{ */
447 status = recv(fd, buffer, sizeof(buffer), /* flags = */ MSG_DONTWAIT);
450 if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
453 ERROR("statsd plugin: recv(2) failed: %s", STRERRNO);
457 buffer_size = (size_t)status;
458 if (buffer_size >= sizeof(buffer))
459 buffer_size = sizeof(buffer) - 1;
460 buffer[buffer_size] = 0;
462 statsd_parse_buffer(buffer);
463 } /* }}} void statsd_network_read */
465 static int statsd_network_init(struct pollfd **ret_fds, /* {{{ */
466 size_t *ret_fds_num) {
467 struct pollfd *fds = NULL;
470 struct addrinfo *ai_list;
473 char const *node = (conf_node != NULL) ? conf_node : STATSD_DEFAULT_NODE;
474 char const *service =
475 (conf_service != NULL) ? conf_service : STATSD_DEFAULT_SERVICE;
477 struct addrinfo ai_hints = {.ai_family = AF_UNSPEC,
478 .ai_flags = AI_PASSIVE | AI_ADDRCONFIG,
479 .ai_socktype = SOCK_DGRAM};
481 status = getaddrinfo(node, service, &ai_hints, &ai_list);
483 ERROR("statsd plugin: getaddrinfo (\"%s\", \"%s\") failed: %s", node,
484 service, gai_strerror(status));
488 for (struct addrinfo *ai_ptr = ai_list; ai_ptr != NULL;
489 ai_ptr = ai_ptr->ai_next) {
493 char dbg_node[NI_MAXHOST];
494 char dbg_service[NI_MAXSERV];
496 fd = socket(ai_ptr->ai_family, ai_ptr->ai_socktype, ai_ptr->ai_protocol);
498 ERROR("statsd plugin: socket(2) failed: %s", STRERRNO);
502 getnameinfo(ai_ptr->ai_addr, ai_ptr->ai_addrlen, dbg_node, sizeof(dbg_node),
503 dbg_service, sizeof(dbg_service),
504 NI_DGRAM | NI_NUMERICHOST | NI_NUMERICSERV);
505 DEBUG("statsd plugin: Trying to bind to [%s]:%s ...", dbg_node,
508 status = bind(fd, ai_ptr->ai_addr, ai_ptr->ai_addrlen);
510 ERROR("statsd plugin: bind(2) failed: %s", STRERRNO);
515 tmp = realloc(fds, sizeof(*fds) * (fds_num + 1));
517 ERROR("statsd plugin: realloc failed.");
525 memset(tmp, 0, sizeof(*tmp));
527 tmp->events = POLLIN | POLLPRI;
530 freeaddrinfo(ai_list);
533 ERROR("statsd plugin: Unable to create listening socket for [%s]:%s.",
534 (node != NULL) ? node : "::", service);
539 *ret_fds_num = fds_num;
541 } /* }}} int statsd_network_init */
543 static void *statsd_network_thread(void *args) /* {{{ */
545 struct pollfd *fds = NULL;
549 status = statsd_network_init(&fds, &fds_num);
551 ERROR("statsd plugin: Unable to open listening sockets.");
552 pthread_exit((void *)0);
555 while (!network_thread_shutdown) {
556 status = poll(fds, (nfds_t)fds_num, /* timeout = */ -1);
559 if ((errno == EINTR) || (errno == EAGAIN))
562 ERROR("statsd plugin: poll(2) failed: %s", STRERRNO);
566 for (size_t i = 0; i < fds_num; i++) {
567 if ((fds[i].revents & (POLLIN | POLLPRI)) == 0)
570 statsd_network_read(fds[i].fd);
573 } /* while (!network_thread_shutdown) */
576 for (size_t i = 0; i < fds_num; i++)
581 } /* }}} void *statsd_network_thread */
583 static int statsd_config_timer_percentile(oconfig_item_t *ci) /* {{{ */
585 double percent = NAN;
589 status = cf_util_get_double(ci, &percent);
593 if ((percent <= 0.0) || (percent >= 100)) {
594 ERROR("statsd plugin: The value for \"%s\" must be between 0 and 100, "
601 realloc(conf_timer_percentile,
602 sizeof(*conf_timer_percentile) * (conf_timer_percentile_num + 1));
604 ERROR("statsd plugin: realloc failed.");
607 conf_timer_percentile = tmp;
608 conf_timer_percentile[conf_timer_percentile_num] = percent;
609 conf_timer_percentile_num++;
612 } /* }}} int statsd_config_timer_percentile */
614 static int statsd_config(oconfig_item_t *ci) /* {{{ */
616 for (int i = 0; i < ci->children_num; i++) {
617 oconfig_item_t *child = ci->children + i;
619 if (strcasecmp("Host", child->key) == 0)
620 cf_util_get_string(child, &conf_node);
621 else if (strcasecmp("Port", child->key) == 0)
622 cf_util_get_service(child, &conf_service);
623 else if (strcasecmp("DeleteCounters", child->key) == 0)
624 cf_util_get_boolean(child, &conf_delete_counters);
625 else if (strcasecmp("DeleteTimers", child->key) == 0)
626 cf_util_get_boolean(child, &conf_delete_timers);
627 else if (strcasecmp("DeleteGauges", child->key) == 0)
628 cf_util_get_boolean(child, &conf_delete_gauges);
629 else if (strcasecmp("DeleteSets", child->key) == 0)
630 cf_util_get_boolean(child, &conf_delete_sets);
631 else if (strcasecmp("CounterSum", child->key) == 0)
632 cf_util_get_boolean(child, &conf_counter_sum);
633 else if (strcasecmp("TimerLower", child->key) == 0)
634 cf_util_get_boolean(child, &conf_timer_lower);
635 else if (strcasecmp("TimerUpper", child->key) == 0)
636 cf_util_get_boolean(child, &conf_timer_upper);
637 else if (strcasecmp("TimerSum", child->key) == 0)
638 cf_util_get_boolean(child, &conf_timer_sum);
639 else if (strcasecmp("TimerCount", child->key) == 0)
640 cf_util_get_boolean(child, &conf_timer_count);
641 else if (strcasecmp("TimerPercentile", child->key) == 0)
642 statsd_config_timer_percentile(child);
644 ERROR("statsd plugin: The \"%s\" config option is not valid.",
649 } /* }}} int statsd_config */
651 static int statsd_init(void) /* {{{ */
653 pthread_mutex_lock(&metrics_lock);
654 if (metrics_tree == NULL)
655 metrics_tree = c_avl_create((int (*)(const void *, const void *))strcmp);
657 if (!network_thread_running) {
660 status = pthread_create(&network_thread,
661 /* attr = */ NULL, statsd_network_thread,
664 pthread_mutex_unlock(&metrics_lock);
665 ERROR("statsd plugin: pthread_create failed: %s", STRERRNO);
669 network_thread_running = true;
671 pthread_mutex_unlock(&metrics_lock);
674 } /* }}} int statsd_init */
676 /* Must hold metrics_lock when calling this function. */
677 static int statsd_metric_clear_set_unsafe(statsd_metric_t *metric) /* {{{ */
682 if ((metric == NULL) || (metric->type != STATSD_SET))
685 if (metric->set == NULL)
688 while (c_avl_pick(metric->set, &key, &value) == 0) {
694 } /* }}} int statsd_metric_clear_set_unsafe */
696 /* Must hold metrics_lock when calling this function. */
697 static int statsd_metric_submit_unsafe(char const *name,
698 statsd_metric_t *metric) /* {{{ */
700 value_list_t vl = VALUE_LIST_INIT;
702 vl.values = &(value_t){.gauge = NAN};
704 sstrncpy(vl.plugin, "statsd", sizeof(vl.plugin));
706 if (metric->type == STATSD_GAUGE)
707 sstrncpy(vl.type, "gauge", sizeof(vl.type));
708 else if (metric->type == STATSD_TIMER)
709 sstrncpy(vl.type, "latency", sizeof(vl.type));
710 else if (metric->type == STATSD_SET)
711 sstrncpy(vl.type, "objects", sizeof(vl.type));
712 else /* if (metric->type == STATSD_COUNTER) */
713 sstrncpy(vl.type, "derive", sizeof(vl.type));
715 sstrncpy(vl.type_instance, name, sizeof(vl.type_instance));
717 if (metric->type == STATSD_GAUGE)
718 vl.values[0].gauge = (gauge_t)metric->value;
719 else if (metric->type == STATSD_TIMER) {
720 bool have_events = (metric->updates_num > 0);
722 /* Make sure all timer metrics share the *same* timestamp. */
725 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-average", name);
728 ? CDTIME_T_TO_DOUBLE(latency_counter_get_average(metric->latency))
730 plugin_dispatch_values(&vl);
732 if (conf_timer_lower) {
733 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-lower", name);
736 ? CDTIME_T_TO_DOUBLE(latency_counter_get_min(metric->latency))
738 plugin_dispatch_values(&vl);
741 if (conf_timer_upper) {
742 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-upper", name);
745 ? CDTIME_T_TO_DOUBLE(latency_counter_get_max(metric->latency))
747 plugin_dispatch_values(&vl);
750 if (conf_timer_sum) {
751 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-sum", name);
754 ? CDTIME_T_TO_DOUBLE(latency_counter_get_sum(metric->latency))
756 plugin_dispatch_values(&vl);
759 for (size_t i = 0; i < conf_timer_percentile_num; i++) {
760 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-percentile-%.0f",
761 name, conf_timer_percentile[i]);
763 have_events ? CDTIME_T_TO_DOUBLE(latency_counter_get_percentile(
764 metric->latency, conf_timer_percentile[i]))
766 plugin_dispatch_values(&vl);
769 /* Keep this at the end, since vl.type is set to "gauge" here. The
770 * vl.type's above are implicitly set to "latency". */
771 if (conf_timer_count) {
772 sstrncpy(vl.type, "gauge", sizeof(vl.type));
773 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-count", name);
774 vl.values[0].gauge = latency_counter_get_num(metric->latency);
775 plugin_dispatch_values(&vl);
778 latency_counter_reset(metric->latency);
780 } else if (metric->type == STATSD_SET) {
781 if (metric->set == NULL)
782 vl.values[0].gauge = 0.0;
784 vl.values[0].gauge = (gauge_t)c_avl_size(metric->set);
785 } else { /* STATSD_COUNTER */
786 gauge_t delta = nearbyint(metric->value);
788 /* Etsy's statsd writes counters as two metrics: a rate and the change since
789 * the last write. Since collectd does not reset its DERIVE metrics to zero,
790 * this makes little sense, but we're dispatching a "count" metric here
791 * anyway - if requested by the user - for compatibility reasons. */
792 if (conf_counter_sum) {
793 sstrncpy(vl.type, "count", sizeof(vl.type));
794 vl.values[0].gauge = delta;
795 plugin_dispatch_values(&vl);
797 /* restore vl.type */
798 sstrncpy(vl.type, "derive", sizeof(vl.type));
801 /* Rather than resetting value to zero, subtract delta so we correctly keep
802 * track of residuals. */
803 metric->value -= delta;
804 metric->counter += (derive_t)delta;
806 vl.values[0].derive = metric->counter;
809 return plugin_dispatch_values(&vl);
810 } /* }}} int statsd_metric_submit_unsafe */
812 static int statsd_read(void) /* {{{ */
814 c_avl_iterator_t *iter;
816 statsd_metric_t *metric;
818 char **to_be_deleted = NULL;
819 size_t to_be_deleted_num = 0;
821 pthread_mutex_lock(&metrics_lock);
823 if (metrics_tree == NULL) {
824 pthread_mutex_unlock(&metrics_lock);
828 iter = c_avl_get_iterator(metrics_tree);
829 while (c_avl_iterator_next(iter, (void *)&name, (void *)&metric) == 0) {
830 if ((metric->updates_num == 0) &&
831 ((conf_delete_counters && (metric->type == STATSD_COUNTER)) ||
832 (conf_delete_timers && (metric->type == STATSD_TIMER)) ||
833 (conf_delete_gauges && (metric->type == STATSD_GAUGE)) ||
834 (conf_delete_sets && (metric->type == STATSD_SET)))) {
835 DEBUG("statsd plugin: Deleting metric \"%s\".", name);
836 strarray_add(&to_be_deleted, &to_be_deleted_num, name);
840 /* Names have a prefix, e.g. "c:", which determines the (statsd) type.
841 * Remove this here. */
842 statsd_metric_submit_unsafe(name + 2, metric);
844 /* Reset the metric. */
845 metric->updates_num = 0;
846 if (metric->type == STATSD_SET)
847 statsd_metric_clear_set_unsafe(metric);
849 c_avl_iterator_destroy(iter);
851 for (size_t i = 0; i < to_be_deleted_num; i++) {
854 status = c_avl_remove(metrics_tree, to_be_deleted[i], (void *)&name,
857 ERROR("stats plugin: c_avl_remove (\"%s\") failed with status %i.",
858 to_be_deleted[i], status);
863 statsd_metric_free(metric);
866 pthread_mutex_unlock(&metrics_lock);
868 strarray_free(to_be_deleted, to_be_deleted_num);
871 } /* }}} int statsd_read */
873 static int statsd_shutdown(void) /* {{{ */
878 if (network_thread_running) {
879 network_thread_shutdown = true;
880 pthread_kill(network_thread, SIGTERM);
881 pthread_join(network_thread, /* retval = */ NULL);
883 network_thread_running = false;
885 pthread_mutex_lock(&metrics_lock);
887 while (c_avl_pick(metrics_tree, &key, &value) == 0) {
889 statsd_metric_free(value);
891 c_avl_destroy(metrics_tree);
897 pthread_mutex_unlock(&metrics_lock);
900 } /* }}} int statsd_shutdown */
902 void module_register(void) {
903 plugin_register_complex_config("statsd", statsd_config);
904 plugin_register_init("statsd", statsd_init);
905 plugin_register_read("statsd", statsd_read);
906 plugin_register_shutdown("statsd", statsd_shutdown);