2 * collectd - src/statsd.c
3 * Copyright (C) 2013 Florian octo Forster
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Florian octo Forster <octo at collectd.org>
31 #include "utils_avltree.h"
32 #include "utils_latency.h"
36 #include <sys/types.h>
38 /* AIX doesn't have MSG_DONTWAIT */
40 #define MSG_DONTWAIT MSG_NONBLOCK
43 #ifndef STATSD_DEFAULT_NODE
44 #define STATSD_DEFAULT_NODE NULL
47 #ifndef STATSD_DEFAULT_SERVICE
48 #define STATSD_DEFAULT_SERVICE "8125"
51 enum metric_type_e { STATSD_COUNTER, STATSD_TIMER, STATSD_GAUGE, STATSD_SET };
52 typedef enum metric_type_e metric_type_t;
54 struct statsd_metric_s {
58 latency_counter_t *latency;
60 unsigned long updates_num;
62 typedef struct statsd_metric_s statsd_metric_t;
64 static c_avl_tree_t *metrics_tree;
65 static pthread_mutex_t metrics_lock = PTHREAD_MUTEX_INITIALIZER;
67 static pthread_t network_thread;
68 static bool network_thread_running;
69 static bool network_thread_shutdown;
71 static char *conf_node;
72 static char *conf_service;
74 static bool conf_delete_counters;
75 static bool conf_delete_timers;
76 static bool conf_delete_gauges;
77 static bool conf_delete_sets;
79 static double *conf_timer_percentile;
80 static size_t conf_timer_percentile_num;
82 static bool conf_counter_sum;
83 static bool conf_timer_lower;
84 static bool conf_timer_upper;
85 static bool conf_timer_sum;
86 static bool conf_timer_count;
88 /* Must hold metrics_lock when calling this function. */
89 static statsd_metric_t *statsd_metric_lookup_unsafe(char const *name, /* {{{ */
91 char key[DATA_MAX_NAME_LEN + 2];
93 statsd_metric_t *metric;
114 sstrncpy(&key[2], name, sizeof(key) - 2);
116 status = c_avl_get(metrics_tree, key, (void *)&metric);
120 key_copy = strdup(key);
121 if (key_copy == NULL) {
122 ERROR("statsd plugin: strdup failed.");
126 metric = calloc(1, sizeof(*metric));
127 if (metric == NULL) {
128 ERROR("statsd plugin: calloc failed.");
134 metric->latency = NULL;
137 status = c_avl_insert(metrics_tree, key_copy, metric);
139 ERROR("statsd plugin: c_avl_insert failed.");
146 } /* }}} statsd_metric_lookup_unsafe */
148 static int statsd_metric_set(char const *name, double value, /* {{{ */
149 metric_type_t type) {
150 statsd_metric_t *metric;
152 pthread_mutex_lock(&metrics_lock);
154 metric = statsd_metric_lookup_unsafe(name, type);
155 if (metric == NULL) {
156 pthread_mutex_unlock(&metrics_lock);
160 metric->value = value;
161 metric->updates_num++;
163 pthread_mutex_unlock(&metrics_lock);
166 } /* }}} int statsd_metric_set */
168 static int statsd_metric_add(char const *name, double delta, /* {{{ */
169 metric_type_t type) {
170 statsd_metric_t *metric;
172 pthread_mutex_lock(&metrics_lock);
174 metric = statsd_metric_lookup_unsafe(name, type);
175 if (metric == NULL) {
176 pthread_mutex_unlock(&metrics_lock);
180 metric->value += delta;
181 metric->updates_num++;
183 pthread_mutex_unlock(&metrics_lock);
186 } /* }}} int statsd_metric_add */
188 static void statsd_metric_free(statsd_metric_t *metric) /* {{{ */
193 if (metric->latency != NULL) {
194 latency_counter_destroy(metric->latency);
195 metric->latency = NULL;
198 if (metric->set != NULL) {
202 while (c_avl_pick(metric->set, &key, &value) == 0) {
204 assert(value == NULL);
207 c_avl_destroy(metric->set);
212 } /* }}} void statsd_metric_free */
214 static int statsd_parse_value(char const *str, value_t *ret_value) /* {{{ */
218 ret_value->gauge = (gauge_t)strtod(str, &endptr);
219 if ((str == endptr) || ((endptr != NULL) && (*endptr != 0)))
223 } /* }}} int statsd_parse_value */
225 static int statsd_handle_counter(char const *name, /* {{{ */
226 char const *value_str, char const *extra) {
231 if ((extra != NULL) && (extra[0] != '@'))
236 status = statsd_parse_value(extra + 1, &scale);
240 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
245 status = statsd_parse_value(value_str, &value);
249 /* Changes to the counter are added to (statsd_metric_t*)->value. ->counter is
250 * only updated in statsd_metric_submit_unsafe(). */
251 return statsd_metric_add(name, (double)(value.gauge / scale.gauge),
253 } /* }}} int statsd_handle_counter */
255 static int statsd_handle_gauge(char const *name, /* {{{ */
256 char const *value_str) {
261 status = statsd_parse_value(value_str, &value);
265 if ((value_str[0] == '+') || (value_str[0] == '-'))
266 return statsd_metric_add(name, (double)value.gauge, STATSD_GAUGE);
268 return statsd_metric_set(name, (double)value.gauge, STATSD_GAUGE);
269 } /* }}} int statsd_handle_gauge */
271 static int statsd_handle_timer(char const *name, /* {{{ */
272 char const *value_str, char const *extra) {
273 statsd_metric_t *metric;
279 if ((extra != NULL) && (extra[0] != '@'))
284 status = statsd_parse_value(extra + 1, &scale);
288 if (!isfinite(scale.gauge) || (scale.gauge <= 0.0) || (scale.gauge > 1.0))
293 status = statsd_parse_value(value_str, &value_ms);
297 value = MS_TO_CDTIME_T(value_ms.gauge / scale.gauge);
299 pthread_mutex_lock(&metrics_lock);
301 metric = statsd_metric_lookup_unsafe(name, STATSD_TIMER);
302 if (metric == NULL) {
303 pthread_mutex_unlock(&metrics_lock);
307 if (metric->latency == NULL)
308 metric->latency = latency_counter_create();
309 if (metric->latency == NULL) {
310 pthread_mutex_unlock(&metrics_lock);
314 latency_counter_add(metric->latency, value);
315 metric->updates_num++;
317 pthread_mutex_unlock(&metrics_lock);
319 } /* }}} int statsd_handle_timer */
321 static int statsd_handle_set(char const *name, /* {{{ */
322 char const *set_key_orig) {
323 statsd_metric_t *metric = NULL;
327 pthread_mutex_lock(&metrics_lock);
329 metric = statsd_metric_lookup_unsafe(name, STATSD_SET);
330 if (metric == NULL) {
331 pthread_mutex_unlock(&metrics_lock);
335 /* Make sure metric->set exists. */
336 if (metric->set == NULL)
337 metric->set = c_avl_create((int (*)(const void *, const void *))strcmp);
339 if (metric->set == NULL) {
340 pthread_mutex_unlock(&metrics_lock);
341 ERROR("statsd plugin: c_avl_create failed.");
345 set_key = strdup(set_key_orig);
346 if (set_key == NULL) {
347 pthread_mutex_unlock(&metrics_lock);
348 ERROR("statsd plugin: strdup failed.");
352 status = c_avl_insert(metric->set, set_key, /* value = */ NULL);
354 pthread_mutex_unlock(&metrics_lock);
355 ERROR("statsd plugin: c_avl_insert (\"%s\") failed with status %i.",
359 } else if (status > 0) /* key already exists */
364 metric->updates_num++;
366 pthread_mutex_unlock(&metrics_lock);
368 } /* }}} int statsd_handle_set */
370 static int statsd_parse_line(char *buffer) /* {{{ */
377 type = strchr(name, '|');
383 value = strrchr(name, ':');
389 extra = strchr(type, '|');
395 if (strcmp("c", type) == 0)
396 return statsd_handle_counter(name, value, extra);
397 else if (strcmp("ms", type) == 0)
398 return statsd_handle_timer(name, value, extra);
400 /* extra is only valid for counters and timers */
404 if (strcmp("g", type) == 0)
405 return statsd_handle_gauge(name, value);
406 else if (strcmp("s", type) == 0)
407 return statsd_handle_set(name, value);
410 } /* }}} void statsd_parse_line */
412 static void statsd_parse_buffer(char *buffer) /* {{{ */
414 while (buffer != NULL) {
419 next = strchr(buffer, '\n');
430 sstrncpy(orig, buffer, sizeof(orig));
432 status = statsd_parse_line(buffer);
434 ERROR("statsd plugin: Unable to parse line: \"%s\"", orig);
438 } /* }}} void statsd_parse_buffer */
440 static void statsd_network_read(int fd) /* {{{ */
446 status = recv(fd, buffer, sizeof(buffer), /* flags = */ MSG_DONTWAIT);
449 if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
452 ERROR("statsd plugin: recv(2) failed: %s", STRERRNO);
456 buffer_size = (size_t)status;
457 if (buffer_size >= sizeof(buffer))
458 buffer_size = sizeof(buffer) - 1;
459 buffer[buffer_size] = 0;
461 statsd_parse_buffer(buffer);
462 } /* }}} void statsd_network_read */
464 static int statsd_network_init(struct pollfd **ret_fds, /* {{{ */
465 size_t *ret_fds_num) {
466 struct pollfd *fds = NULL;
469 struct addrinfo *ai_list;
472 char const *node = (conf_node != NULL) ? conf_node : STATSD_DEFAULT_NODE;
473 char const *service =
474 (conf_service != NULL) ? conf_service : STATSD_DEFAULT_SERVICE;
476 struct addrinfo ai_hints = {.ai_family = AF_UNSPEC,
477 .ai_flags = AI_PASSIVE | AI_ADDRCONFIG,
478 .ai_socktype = SOCK_DGRAM};
480 status = getaddrinfo(node, service, &ai_hints, &ai_list);
482 ERROR("statsd plugin: getaddrinfo (\"%s\", \"%s\") failed: %s", node,
483 service, gai_strerror(status));
487 for (struct addrinfo *ai_ptr = ai_list; ai_ptr != NULL;
488 ai_ptr = ai_ptr->ai_next) {
492 char dbg_node[NI_MAXHOST];
493 char dbg_service[NI_MAXSERV];
495 fd = socket(ai_ptr->ai_family, ai_ptr->ai_socktype, ai_ptr->ai_protocol);
497 ERROR("statsd plugin: socket(2) failed: %s", STRERRNO);
501 getnameinfo(ai_ptr->ai_addr, ai_ptr->ai_addrlen, dbg_node, sizeof(dbg_node),
502 dbg_service, sizeof(dbg_service),
503 NI_DGRAM | NI_NUMERICHOST | NI_NUMERICSERV);
504 DEBUG("statsd plugin: Trying to bind to [%s]:%s ...", dbg_node,
507 status = bind(fd, ai_ptr->ai_addr, ai_ptr->ai_addrlen);
509 ERROR("statsd plugin: bind(2) failed: %s", STRERRNO);
514 tmp = realloc(fds, sizeof(*fds) * (fds_num + 1));
516 ERROR("statsd plugin: realloc failed.");
524 memset(tmp, 0, sizeof(*tmp));
526 tmp->events = POLLIN | POLLPRI;
529 freeaddrinfo(ai_list);
532 ERROR("statsd plugin: Unable to create listening socket for [%s]:%s.",
533 (node != NULL) ? node : "::", service);
538 *ret_fds_num = fds_num;
540 } /* }}} int statsd_network_init */
542 static void *statsd_network_thread(void *args) /* {{{ */
544 struct pollfd *fds = NULL;
548 status = statsd_network_init(&fds, &fds_num);
550 ERROR("statsd plugin: Unable to open listening sockets.");
551 pthread_exit((void *)0);
554 while (!network_thread_shutdown) {
555 status = poll(fds, (nfds_t)fds_num, /* timeout = */ -1);
558 if ((errno == EINTR) || (errno == EAGAIN))
561 ERROR("statsd plugin: poll(2) failed: %s", STRERRNO);
565 for (size_t i = 0; i < fds_num; i++) {
566 if ((fds[i].revents & (POLLIN | POLLPRI)) == 0)
569 statsd_network_read(fds[i].fd);
572 } /* while (!network_thread_shutdown) */
575 for (size_t i = 0; i < fds_num; i++)
580 } /* }}} void *statsd_network_thread */
582 static int statsd_config_timer_percentile(oconfig_item_t *ci) /* {{{ */
584 double percent = NAN;
588 status = cf_util_get_double(ci, &percent);
592 if ((percent <= 0.0) || (percent >= 100)) {
593 ERROR("statsd plugin: The value for \"%s\" must be between 0 and 100, "
600 realloc(conf_timer_percentile,
601 sizeof(*conf_timer_percentile) * (conf_timer_percentile_num + 1));
603 ERROR("statsd plugin: realloc failed.");
606 conf_timer_percentile = tmp;
607 conf_timer_percentile[conf_timer_percentile_num] = percent;
608 conf_timer_percentile_num++;
611 } /* }}} int statsd_config_timer_percentile */
613 static int statsd_config(oconfig_item_t *ci) /* {{{ */
615 for (int i = 0; i < ci->children_num; i++) {
616 oconfig_item_t *child = ci->children + i;
618 if (strcasecmp("Host", child->key) == 0)
619 cf_util_get_string(child, &conf_node);
620 else if (strcasecmp("Port", child->key) == 0)
621 cf_util_get_service(child, &conf_service);
622 else if (strcasecmp("DeleteCounters", child->key) == 0)
623 cf_util_get_boolean(child, &conf_delete_counters);
624 else if (strcasecmp("DeleteTimers", child->key) == 0)
625 cf_util_get_boolean(child, &conf_delete_timers);
626 else if (strcasecmp("DeleteGauges", child->key) == 0)
627 cf_util_get_boolean(child, &conf_delete_gauges);
628 else if (strcasecmp("DeleteSets", child->key) == 0)
629 cf_util_get_boolean(child, &conf_delete_sets);
630 else if (strcasecmp("CounterSum", child->key) == 0)
631 cf_util_get_boolean(child, &conf_counter_sum);
632 else if (strcasecmp("TimerLower", child->key) == 0)
633 cf_util_get_boolean(child, &conf_timer_lower);
634 else if (strcasecmp("TimerUpper", child->key) == 0)
635 cf_util_get_boolean(child, &conf_timer_upper);
636 else if (strcasecmp("TimerSum", child->key) == 0)
637 cf_util_get_boolean(child, &conf_timer_sum);
638 else if (strcasecmp("TimerCount", child->key) == 0)
639 cf_util_get_boolean(child, &conf_timer_count);
640 else if (strcasecmp("TimerPercentile", child->key) == 0)
641 statsd_config_timer_percentile(child);
643 ERROR("statsd plugin: The \"%s\" config option is not valid.",
648 } /* }}} int statsd_config */
650 static int statsd_init(void) /* {{{ */
652 pthread_mutex_lock(&metrics_lock);
653 if (metrics_tree == NULL)
654 metrics_tree = c_avl_create((int (*)(const void *, const void *))strcmp);
656 if (!network_thread_running) {
659 status = pthread_create(&network_thread,
660 /* attr = */ NULL, statsd_network_thread,
663 pthread_mutex_unlock(&metrics_lock);
664 ERROR("statsd plugin: pthread_create failed: %s", STRERRNO);
668 network_thread_running = true;
670 pthread_mutex_unlock(&metrics_lock);
673 } /* }}} int statsd_init */
675 /* Must hold metrics_lock when calling this function. */
676 static int statsd_metric_clear_set_unsafe(statsd_metric_t *metric) /* {{{ */
681 if ((metric == NULL) || (metric->type != STATSD_SET))
684 if (metric->set == NULL)
687 while (c_avl_pick(metric->set, &key, &value) == 0) {
693 } /* }}} int statsd_metric_clear_set_unsafe */
695 /* Must hold metrics_lock when calling this function. */
696 static int statsd_metric_submit_unsafe(char const *name,
697 statsd_metric_t *metric) /* {{{ */
699 value_list_t vl = VALUE_LIST_INIT;
701 vl.values = &(value_t){.gauge = NAN};
703 sstrncpy(vl.plugin, "statsd", sizeof(vl.plugin));
705 if (metric->type == STATSD_GAUGE)
706 sstrncpy(vl.type, "gauge", sizeof(vl.type));
707 else if (metric->type == STATSD_TIMER)
708 sstrncpy(vl.type, "latency", sizeof(vl.type));
709 else if (metric->type == STATSD_SET)
710 sstrncpy(vl.type, "objects", sizeof(vl.type));
711 else /* if (metric->type == STATSD_COUNTER) */
712 sstrncpy(vl.type, "derive", sizeof(vl.type));
714 sstrncpy(vl.type_instance, name, sizeof(vl.type_instance));
716 if (metric->type == STATSD_GAUGE)
717 vl.values[0].gauge = (gauge_t)metric->value;
718 else if (metric->type == STATSD_TIMER) {
719 bool have_events = (metric->updates_num > 0);
721 /* Make sure all timer metrics share the *same* timestamp. */
724 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-average", name);
727 ? CDTIME_T_TO_DOUBLE(latency_counter_get_average(metric->latency))
729 plugin_dispatch_values(&vl);
731 if (conf_timer_lower) {
732 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-lower", name);
735 ? CDTIME_T_TO_DOUBLE(latency_counter_get_min(metric->latency))
737 plugin_dispatch_values(&vl);
740 if (conf_timer_upper) {
741 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-upper", name);
744 ? CDTIME_T_TO_DOUBLE(latency_counter_get_max(metric->latency))
746 plugin_dispatch_values(&vl);
749 if (conf_timer_sum) {
750 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-sum", name);
753 ? CDTIME_T_TO_DOUBLE(latency_counter_get_sum(metric->latency))
755 plugin_dispatch_values(&vl);
758 for (size_t i = 0; i < conf_timer_percentile_num; i++) {
759 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-percentile-%.0f",
760 name, conf_timer_percentile[i]);
762 have_events ? CDTIME_T_TO_DOUBLE(latency_counter_get_percentile(
763 metric->latency, conf_timer_percentile[i]))
765 plugin_dispatch_values(&vl);
768 /* Keep this at the end, since vl.type is set to "gauge" here. The
769 * vl.type's above are implicitly set to "latency". */
770 if (conf_timer_count) {
771 sstrncpy(vl.type, "gauge", sizeof(vl.type));
772 snprintf(vl.type_instance, sizeof(vl.type_instance), "%s-count", name);
773 vl.values[0].gauge = latency_counter_get_num(metric->latency);
774 plugin_dispatch_values(&vl);
777 latency_counter_reset(metric->latency);
779 } else if (metric->type == STATSD_SET) {
780 if (metric->set == NULL)
781 vl.values[0].gauge = 0.0;
783 vl.values[0].gauge = (gauge_t)c_avl_size(metric->set);
784 } else { /* STATSD_COUNTER */
785 gauge_t delta = nearbyint(metric->value);
787 /* Etsy's statsd writes counters as two metrics: a rate and the change since
788 * the last write. Since collectd does not reset its DERIVE metrics to zero,
789 * this makes little sense, but we're dispatching a "count" metric here
790 * anyway - if requested by the user - for compatibility reasons. */
791 if (conf_counter_sum) {
792 sstrncpy(vl.type, "count", sizeof(vl.type));
793 vl.values[0].gauge = delta;
794 plugin_dispatch_values(&vl);
796 /* restore vl.type */
797 sstrncpy(vl.type, "derive", sizeof(vl.type));
800 /* Rather than resetting value to zero, subtract delta so we correctly keep
801 * track of residuals. */
802 metric->value -= delta;
803 metric->counter += (derive_t)delta;
805 vl.values[0].derive = metric->counter;
808 return plugin_dispatch_values(&vl);
809 } /* }}} int statsd_metric_submit_unsafe */
811 static int statsd_read(void) /* {{{ */
813 c_avl_iterator_t *iter;
815 statsd_metric_t *metric;
817 char **to_be_deleted = NULL;
818 size_t to_be_deleted_num = 0;
820 pthread_mutex_lock(&metrics_lock);
822 if (metrics_tree == NULL) {
823 pthread_mutex_unlock(&metrics_lock);
827 iter = c_avl_get_iterator(metrics_tree);
828 while (c_avl_iterator_next(iter, (void *)&name, (void *)&metric) == 0) {
829 if ((metric->updates_num == 0) &&
830 ((conf_delete_counters && (metric->type == STATSD_COUNTER)) ||
831 (conf_delete_timers && (metric->type == STATSD_TIMER)) ||
832 (conf_delete_gauges && (metric->type == STATSD_GAUGE)) ||
833 (conf_delete_sets && (metric->type == STATSD_SET)))) {
834 DEBUG("statsd plugin: Deleting metric \"%s\".", name);
835 strarray_add(&to_be_deleted, &to_be_deleted_num, name);
839 /* Names have a prefix, e.g. "c:", which determines the (statsd) type.
840 * Remove this here. */
841 statsd_metric_submit_unsafe(name + 2, metric);
843 /* Reset the metric. */
844 metric->updates_num = 0;
845 if (metric->type == STATSD_SET)
846 statsd_metric_clear_set_unsafe(metric);
848 c_avl_iterator_destroy(iter);
850 for (size_t i = 0; i < to_be_deleted_num; i++) {
853 status = c_avl_remove(metrics_tree, to_be_deleted[i], (void *)&name,
856 ERROR("stats plugin: c_avl_remove (\"%s\") failed with status %i.",
857 to_be_deleted[i], status);
862 statsd_metric_free(metric);
865 pthread_mutex_unlock(&metrics_lock);
867 strarray_free(to_be_deleted, to_be_deleted_num);
870 } /* }}} int statsd_read */
872 static int statsd_shutdown(void) /* {{{ */
877 if (network_thread_running) {
878 network_thread_shutdown = true;
879 pthread_kill(network_thread, SIGTERM);
880 pthread_join(network_thread, /* retval = */ NULL);
882 network_thread_running = false;
884 pthread_mutex_lock(&metrics_lock);
886 while (c_avl_pick(metrics_tree, &key, &value) == 0) {
888 statsd_metric_free(value);
890 c_avl_destroy(metrics_tree);
896 pthread_mutex_unlock(&metrics_lock);
899 } /* }}} int statsd_shutdown */
901 void module_register(void) {
902 plugin_register_complex_config("statsd", statsd_config);
903 plugin_register_init("statsd", statsd_init);
904 plugin_register_read("statsd", statsd_read);
905 plugin_register_shutdown("statsd", statsd_shutdown);