From 9cc8e54618fe3f8e7af879fcc9e04d71125456fd Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Sun, 1 Mar 2009 09:42:33 +0100 Subject: [PATCH] ping plugin: Add support for drop rate and standard deviation. --- src/collectd.conf.pod | 27 ++ src/ping.c | 712 ++++++++++++++++++++++++++++++++++++-------------- src/types.db | 2 + 3 files changed, 543 insertions(+), 198 deletions(-) diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index 391f0f32..d3240c94 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -1794,6 +1794,13 @@ to collectd's plugin system. See L for its documentation. =head2 Plugin C +The I plugin starts a new thread which sends ICMP "ping" packets to the +configured hosts periodically and measures the network latency. Whenever the +C function of the plugin is called, it submits the average latency, the +standard deviation and the drop rate for each host. + +Available configuration options: + =over 4 =item B I @@ -1801,6 +1808,26 @@ to collectd's plugin system. See L for its documentation. Host to ping periodically. This option may be repeated several times to ping multiple hosts. +=item B I + +Sets the interval in which to send ICMP echo packets to the configured hosts. +This is B the interval in which statistics are queries from the plugin but +the interval in which the hosts are "pinged". Therefore, the setting here +should be smaller than or equal to the global B setting. Fractional +times, such as "1.24" are allowed. + +Default: B<1.0> + +=item B I + +Time to wait for a response from the host to which an ICMP packet had been +sent. If a reply was not received after I seconds, the host is assumed +to be down or the packet to be dropped. This setting must be smaller than the +B setting above for the plugin to work correctly. Fractional +arguments are accepted. + +Default: B<0.9> + =item B I<0-255> Sets the Time-To-Live of generated ICMP packets. diff --git a/src/ping.c b/src/ping.c index 7ffbfaff..94bb1557 100644 --- a/src/ping.c +++ b/src/ping.c @@ -1,6 +1,6 @@ /** * collectd - src/ping.c - * Copyright (C) 2005,2006 Florian octo Forster + * Copyright (C) 2005-2009 Florian octo Forster * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -24,238 +24,554 @@ #include "plugin.h" #include "configfile.h" +#include + #include #include "liboping/oping.h" +#if HAVE_NETDB_H +# include /* NI_MAXHOST */ +#endif + +#ifndef NI_MAXHOST +# define NI_MAXHOST 1025 +#endif + /* * Private data types */ struct hostlist_s { - char *host; - int wait_time; - int wait_left; - struct hostlist_s *next; + char *host; + + uint32_t pkg_sent; + uint32_t pkg_recv; + + double latency_total; + double latency_squared; + + struct hostlist_s *next; }; typedef struct hostlist_s hostlist_t; /* * Private variables */ -static pingobj_t *pingobj = NULL; -static hostlist_t *hosts = NULL; +static hostlist_t *hostlist_head = NULL; + +static int ping_ttl = PING_DEF_TTL; +static double ping_interval = 1.0; +static double ping_timeout = 0.9; + +static int ping_thread_loop = 0; +static int ping_thread_error = 0; +static pthread_t ping_thread_id; +static pthread_mutex_t ping_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t ping_cond = PTHREAD_COND_INITIALIZER; static const char *config_keys[] = { - "Host", - "TTL", - NULL + "Host", + "TTL", + "Interval", + "Timeout" }; -static int config_keys_num = 2; +static int config_keys_num = STATIC_ARRAY_SIZE (config_keys); /* * Private functions */ -static void add_hosts (void) +/* Assure that `ts->tv_nsec' is in the range 0 .. 999999999 */ +static void time_normalize (struct timespec *ts) /* {{{ */ +{ + while (ts->tv_nsec < 0) + { + if (ts->tv_sec == 0) + { + ts->tv_nsec = 0; + return; + } + + ts->tv_sec -= 1; + ts->tv_nsec += 1000000000; + } + + while (ts->tv_nsec >= 1000000000) + { + ts->tv_sec += 1; + ts->tv_nsec -= 1000000000; + } +} /* }}} void time_normalize */ + +/* Add `ts_int' to `tv_begin' and store the result in `ts_dest'. If the result + * is larger than `tv_end', copy `tv_end' to `ts_dest' instead. */ +static void time_calc (struct timespec *ts_dest, /* {{{ */ + const struct timespec *ts_int, + const struct timeval *tv_begin, + const struct timeval *tv_end) { - hostlist_t *hl_this; - hostlist_t *hl_prev; - - hl_this = hosts; - hl_prev = NULL; - while (hl_this != NULL) - { - DEBUG ("ping plugin: host = %s, wait_left = %i, " - "wait_time = %i, next = %p", - hl_this->host, hl_this->wait_left, - hl_this->wait_time, (void *) hl_this->next); - - if (hl_this->wait_left <= 0) - { - if (ping_host_add (pingobj, hl_this->host) == 0) - { - DEBUG ("ping plugin: Successfully added host %s", hl_this->host); - /* Remove the host from the linked list */ - if (hl_prev != NULL) - hl_prev->next = hl_this->next; - else - hosts = hl_this->next; - free (hl_this->host); - free (hl_this); - hl_this = (hl_prev != NULL) ? hl_prev : hosts; - } - else - { - WARNING ("ping plugin: Failed adding host " - "`%s': %s", hl_this->host, - ping_get_error (pingobj)); - hl_this->wait_left = hl_this->wait_time; - hl_this->wait_time *= 2; - if (hl_this->wait_time > 86400) - hl_this->wait_time = 86400; - } - } - else - { - hl_this->wait_left -= interval_g; - } - - if (hl_this != NULL) - { - hl_prev = hl_this; - hl_this = hl_this->next; - } - } -} /* void add_hosts */ - -static int ping_init (void) + ts_dest->tv_sec = tv_begin->tv_sec + ts_int->tv_sec; + ts_dest->tv_nsec = (tv_begin->tv_usec * 1000) + ts_int->tv_nsec; + time_normalize (ts_dest); + + /* Assure that `(begin + interval) > end'. + * This may seem overly complicated, but `tv_sec' is of type `time_t' + * which may be `unsigned. *sigh* */ + if ((tv_end->tv_sec > ts_dest->tv_sec) + || ((tv_end->tv_sec == ts_dest->tv_sec) + && ((tv_end->tv_usec * 1000) > ts_dest->tv_nsec))) + { + ts_dest->tv_sec = tv_end->tv_sec; + ts_dest->tv_nsec = 1000 * tv_end->tv_usec; + } + + time_normalize (ts_dest); +} /* }}} void time_calc */ + +static void *ping_thread (void *arg) /* {{{ */ { - if (pingobj == NULL) - return (-1); + static pingobj_t *pingobj = NULL; + + struct timeval tv_begin; + struct timeval tv_end; + struct timespec ts_wait; + struct timespec ts_int; + + hostlist_t *hl; + int status; + + pthread_mutex_lock (&ping_lock); + + pingobj = ping_construct (); + if (pingobj == NULL) + { + ERROR ("ping plugin: ping_construct failed."); + ping_thread_error = 1; + pthread_mutex_unlock (&ping_lock); + return ((void *) -1); + } + + ping_setopt (pingobj, PING_OPT_TIMEOUT, (void *) &ping_timeout); + ping_setopt (pingobj, PING_OPT_TTL, (void *) &ping_ttl); + + /* Add all the hosts to the ping object. */ + status = 0; + for (hl = hostlist_head; hl != NULL; hl = hl->next) + { + int tmp_status; + tmp_status = ping_host_add (pingobj, hl->host); + if (tmp_status != 0) + WARNING ("ping plugin: ping_host_add (%s) failed.", hl->host); + else + status++; + } + + if (status == 0) + { + ERROR ("ping plugin: No host could be added to ping object. Giving up."); + ping_thread_error = 1; + pthread_mutex_unlock (&ping_lock); + return ((void *) -1); + } + + /* Set up `ts_int' */ + { + double temp_sec; + double temp_nsec; + + temp_nsec = modf (ping_interval, &temp_sec); + ts_int.tv_sec = (time_t) temp_sec; + ts_int.tv_nsec = (long) (temp_nsec * 1000000000L); + } + + while (ping_thread_loop > 0) + { + pingobj_iter_t *iter; + int status; + + if (gettimeofday (&tv_begin, NULL) < 0) + { + ERROR ("ping plugin: gettimeofday failed"); + ping_thread_error = 1; + break; + } + + pthread_mutex_unlock (&ping_lock); + + status = ping_send (pingobj); + if (status < 0) + { + ERROR ("ping plugin: ping_send failed: %s", ping_get_error (pingobj)); + pthread_mutex_lock (&ping_lock); + ping_thread_error = 1; + break; + } + + pthread_mutex_lock (&ping_lock); + + if (ping_thread_loop <= 0) + break; + + for (iter = ping_iterator_get (pingobj); + iter != NULL; + iter = ping_iterator_next (iter)) + { /* {{{ */ + char userhost[NI_MAXHOST]; + double latency; + size_t param_size; + + param_size = sizeof (userhost); + status = ping_iterator_get_info (iter, +#ifdef PING_INFO_USERNAME + PING_INFO_USERNAME, +#else + PING_INFO_HOSTNAME, +#endif + userhost, ¶m_size); + if (status != 0) + { + WARNING ("ping plugin: ping_iterator_get_info failed: %s", + ping_get_error (pingobj)); + continue; + } + + for (hl = hostlist_head; hl != NULL; hl = hl->next) + if (strcmp (userhost, hl->host) == 0) + break; + + if (hl == NULL) + { + WARNING ("ping plugin: Cannot find host %s.", userhost); + continue; + } + + param_size = sizeof (latency); + status = ping_iterator_get_info (iter, PING_INFO_LATENCY, + (void *) &latency, ¶m_size); + if (status != 0) + { + WARNING ("ping plugin: ping_iterator_get_info failed: %s", + ping_get_error (pingobj)); + continue; + } + + hl->pkg_sent++; + if (latency >= 0.0) + { + hl->pkg_recv++; + hl->latency_total += latency; + hl->latency_squared += (latency * latency); + } + } /* }}} for (iter) */ + + if (gettimeofday (&tv_end, NULL) < 0) + { + ERROR ("ping plugin: gettimeofday failed"); + ping_thread_error = 1; + break; + } + + /* Calculate the absolute time until which to wait and store it in + * `ts_wait'. */ + time_calc (&ts_wait, &ts_int, &tv_begin, &tv_end); + + status = pthread_cond_timedwait (&ping_cond, &ping_lock, &ts_wait); + if (ping_thread_loop <= 0) + break; + } /* while (ping_thread_loop > 0) */ + + pthread_mutex_unlock (&ping_lock); + ping_destroy (pingobj); + + return ((void *) 0); +} /* }}} void *ping_thread */ + +static int start_thread (void) /* {{{ */ +{ + int status; + + pthread_mutex_lock (&ping_lock); + + if (ping_thread_loop != 0) + { + pthread_mutex_unlock (&ping_lock); + return (-1); + } + + ping_thread_loop = 1; + ping_thread_error = 0; + status = pthread_create (&ping_thread_id, /* attr = */ NULL, + ping_thread, /* arg = */ (void *) 0); + if (status != 0) + { + ping_thread_loop = 0; + ERROR ("ping plugin: Starting thread failed."); + pthread_mutex_unlock (&ping_lock); + return (-1); + } + + pthread_mutex_unlock (&ping_lock); + return (0); +} /* }}} int start_thread */ + +static int stop_thread (void) /* {{{ */ +{ + int status; + + pthread_mutex_lock (&ping_lock); - if (hosts != NULL) - add_hosts (); + if (ping_thread_loop == 0) + { + pthread_mutex_unlock (&ping_lock); + return (-1); + } - return (0); -} /* int ping_init */ + ping_thread_loop = 0; + pthread_cond_broadcast (&ping_cond); + pthread_mutex_unlock (&ping_lock); -static int ping_config (const char *key, const char *value) + status = pthread_join (ping_thread_id, /* return = */ NULL); + if (status != 0) + { + ERROR ("ping plugin: Stopping thread failed."); + status = -1; + } + + memset (&ping_thread_id, 0, sizeof (ping_thread_id)); + ping_thread_error = 0; + + return (status); +} /* }}} int stop_thread */ + +static int ping_init (void) /* {{{ */ { - if (pingobj == NULL) - { - if ((pingobj = ping_construct ()) == NULL) - { - ERROR ("ping plugin: `ping_construct' failed."); - return (1); - } - } - - if (strcasecmp (key, "host") == 0) - { - hostlist_t *hl; - char *host; - - if ((hl = (hostlist_t *) malloc (sizeof (hostlist_t))) == NULL) - { - char errbuf[1024]; - ERROR ("ping plugin: malloc failed: %s", - sstrerror (errno, errbuf, - sizeof (errbuf))); - return (1); - } - if ((host = strdup (value)) == NULL) - { - char errbuf[1024]; - free (hl); - ERROR ("ping plugin: strdup failed: %s", - sstrerror (errno, errbuf, - sizeof (errbuf))); - return (1); - } - - hl->host = host; - hl->wait_time = 2 * interval_g; - hl->wait_left = 0; - hl->next = hosts; - hosts = hl; - } - else if (strcasecmp (key, "ttl") == 0) - { - int ttl = atoi (value); - if (ping_setopt (pingobj, PING_OPT_TTL, (void *) &ttl)) - { - WARNING ("ping: liboping did not accept the TTL value %i", ttl); - return (1); - } - } - else - { - return (-1); - } - - return (0); -} - -static void ping_submit (char *host, double latency) + if (hostlist_head == NULL) + { + NOTICE ("ping plugin: No hosts have been configured."); + return (-1); + } + + if (ping_timeout > ping_interval) + { + ping_timeout = 0.9 * ping_interval; + WARNING ("ping plugin: Timeout is greater than interval. " + "Will use a timeout of %gs.", ping_timeout); + } + + if (start_thread () != 0) + return (-1); + + return (0); +} /* }}} int ping_init */ + +static int ping_config (const char *key, const char *value) /* {{{ */ { - value_t values[1]; - value_list_t vl = VALUE_LIST_INIT; + if (strcasecmp (key, "Host") == 0) + { + hostlist_t *hl; + char *host; + + hl = (hostlist_t *) malloc (sizeof (hostlist_t)); + if (hl == NULL) + { + char errbuf[1024]; + ERROR ("ping plugin: malloc failed: %s", + sstrerror (errno, errbuf, sizeof (errbuf))); + return (1); + } + + host = strdup (value); + if (host == NULL) + { + char errbuf[1024]; + sfree (hl); + ERROR ("ping plugin: strdup failed: %s", + sstrerror (errno, errbuf, sizeof (errbuf))); + return (1); + } + + hl->host = host; + hl->pkg_sent = 0; + hl->pkg_recv = 0; + hl->latency_total = 0.0; + hl->latency_squared = 0.0; + hl->next = hostlist_head; + hostlist_head = hl; + } + else if (strcasecmp (key, "TTL") == 0) + { + int ttl = atoi (value); + if ((ttl > 0) && (ttl <= 255)) + ping_ttl = ttl; + else + WARNING ("ping plugin: Ignoring invalid TTL %i.", ttl); + } + else if (strcasecmp (key, "Interval") == 0) + { + double tmp; + + tmp = atof (value); + if (tmp > 0.0) + ping_interval = tmp; + else + WARNING ("ping plugin: Ignoring invalid interval %g (%s)", + tmp, value); + } + else if (strcasecmp (key, "Timeout") == 0) + { + double tmp; + + tmp = atof (value); + if (tmp > 0.0) + ping_timeout = tmp; + else + WARNING ("ping plugin: Ignoring invalid timeout %g (%s)", + tmp, value); + } + else + { + return (-1); + } + + return (0); +} /* }}} int ping_config */ + +static void submit (const char *host, const char *type, /* {{{ */ + gauge_t value) +{ + value_t values[1]; + value_list_t vl = VALUE_LIST_INIT; - values[0].gauge = latency; + values[0].gauge = value; - vl.values = values; - vl.values_len = 1; - sstrncpy (vl.host, hostname_g, sizeof (vl.host)); - sstrncpy (vl.plugin, "ping", sizeof (vl.plugin)); - sstrncpy (vl.plugin_instance, "", sizeof (vl.plugin_instance)); - sstrncpy (vl.type_instance, host, sizeof (vl.type_instance)); - sstrncpy (vl.type, "ping", sizeof (vl.type)); + vl.values = values; + vl.values_len = 1; + sstrncpy (vl.host, hostname_g, sizeof (vl.host)); + sstrncpy (vl.plugin, "ping", sizeof (vl.plugin)); + sstrncpy (vl.plugin_instance, "", sizeof (vl.plugin_instance)); + sstrncpy (vl.type_instance, host, sizeof (vl.type_instance)); + sstrncpy (vl.type, type, sizeof (vl.type)); - plugin_dispatch_values (&vl); -} + plugin_dispatch_values (&vl); +} /* }}} void ping_submit */ -static int ping_read (void) +static int ping_read (void) /* {{{ */ +{ + hostlist_t *hl; + + if (ping_thread_error != 0) + { + ERROR ("ping plugin: The ping thread had a problem. Restarting it."); + + stop_thread (); + + for (hl = hostlist_head; hl != NULL; hl = hl->next) + { + hl->pkg_sent = 0; + hl->pkg_recv = 0; + hl->latency_total = 0.0; + hl->latency_squared = 0.0; + } + + start_thread (); + + return (-1); + } /* if (ping_thread_error != 0) */ + + for (hl = hostlist_head; hl != NULL; hl = hl->next) /* {{{ */ + { + uint32_t pkg_sent; + uint32_t pkg_recv; + double latency_total; + double latency_squared; + + double latency_average; + double latency_stddev; + + double droprate; + + /* Locking here works, because the structure of the linked list is only + * changed during configure and shutdown. */ + pthread_mutex_lock (&ping_lock); + + pkg_sent = hl->pkg_sent; + pkg_recv = hl->pkg_recv; + latency_total = hl->latency_total; + latency_squared = hl->latency_squared; + + hl->pkg_sent = 0; + hl->pkg_recv = 0; + hl->latency_total = 0.0; + hl->latency_squared = 0.0; + + pthread_mutex_unlock (&ping_lock); + + /* This e. g. happens when starting up. */ + if (pkg_sent == 0) + { + DEBUG ("ping plugin: No packages for host %s have been sent.", + hl->host); + continue; + } + + /* Calculate average. Beware of division by zero. */ + if (pkg_recv == 0) + latency_average = NAN; + else + latency_average = latency_total / ((double) pkg_recv); + + /* Calculate standard deviation. Beware even more of division by zero. */ + if (pkg_recv == 0) + latency_stddev = NAN; + else if (pkg_recv == 1) + latency_stddev = 0.0; + else + latency_stddev = sqrt (((((double) pkg_recv) * latency_squared) + - (latency_total * latency_total)) + / ((double) (pkg_recv * (pkg_recv - 1)))); + + /* Calculate drop rate. */ + droprate = ((double) (pkg_sent - pkg_recv)) / ((double) pkg_sent); + + submit (hl->host, "ping", latency_average); + submit (hl->host, "ping_stddev", latency_stddev); + submit (hl->host, "ping_droprate", droprate); + } /* }}} for (hl = hostlist_head; hl != NULL; hl = hl->next) */ + + return (0); +} /* }}} int ping_read */ + +static int ping_shutdown (void) /* {{{ */ { - pingobj_iter_t *iter; - - char host[512]; - double latency; - size_t buf_len; - int number_of_hosts; - - if (pingobj == NULL) - return (-1); - - if (hosts != NULL) - add_hosts (); - - if (ping_send (pingobj) < 0) - { - ERROR ("ping plugin: `ping_send' failed: %s", - ping_get_error (pingobj)); - return (-1); - } - - number_of_hosts = 0; - for (iter = ping_iterator_get (pingobj); - iter != NULL; - iter = ping_iterator_next (iter)) - { - buf_len = sizeof (host); - if (ping_iterator_get_info (iter, PING_INFO_HOSTNAME, - host, &buf_len)) - { - WARNING ("ping plugin: ping_iterator_get_info " - "(PING_INFO_HOSTNAME) failed."); - continue; - } - - buf_len = sizeof (latency); - if (ping_iterator_get_info (iter, PING_INFO_LATENCY, - &latency, &buf_len)) - { - WARNING ("ping plugin: ping_iterator_get_info (%s, " - "PING_INFO_LATENCY) failed.", host); - continue; - } - - DEBUG ("ping plugin: host = %s, latency = %f", host, latency); - ping_submit (host, latency); - number_of_hosts++; - } - - if ((number_of_hosts == 0) && (getuid () != 0)) - { - ERROR ("ping plugin: All hosts failed. Try starting collectd as root."); - } - - return (number_of_hosts == 0 ? -1 : 0); -} /* int ping_read */ + hostlist_t *hl; + + INFO ("ping plugin: Shutting down thread."); + if (stop_thread () < 0) + return (-1); + + hl = hostlist_head; + while (hl != NULL) + { + hostlist_t *hl_next; + + hl_next = hl->next; + + sfree (hl->host); + sfree (hl); + + hl = hl_next; + } + + return (0); +} /* }}} int ping_shutdown */ void module_register (void) { - plugin_register_config ("ping", ping_config, - config_keys, config_keys_num); - plugin_register_init ("ping", ping_init); - plugin_register_read ("ping", ping_read); + plugin_register_config ("ping", ping_config, + config_keys, config_keys_num); + plugin_register_init ("ping", ping_init); + plugin_register_read ("ping", ping_read); + plugin_register_shutdown ("ping", ping_shutdown); } /* void module_register */ + +/* vim: set sw=2 sts=2 et fdm=marker : */ diff --git a/src/types.db b/src/types.db index cde4e309..db469843 100644 --- a/src/types.db +++ b/src/types.db @@ -84,6 +84,8 @@ pg_numbackends value:GAUGE:0:U pg_scan value:COUNTER:0:U pg_xact value:COUNTER:0:U ping ping:GAUGE:0:65535 +ping_droprate value:GAUGE:0:100 +ping_stddev value:GAUGE:0:65535 players value:GAUGE:0:1000000 power value:GAUGE:0:U protocol_counter value:COUNTER:0:U -- 2.11.0