2 * collectd - src/utils_threshold.c
3 * Copyright (C) 2007,2008 Florian octo Forster
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; only version 2 of the License is applicable.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * Florian octo Forster <octo at verplant.org>
25 #include "utils_avltree.h"
26 #include "utils_cache.h"
32 * Private data structures
34 #define UT_FLAG_INVERT 0x01
35 #define UT_FLAG_PERSIST 0x02
36 #define UT_FLAG_PERCENTAGE 0x04
38 typedef struct threshold_s
40 char host[DATA_MAX_NAME_LEN];
41 char plugin[DATA_MAX_NAME_LEN];
42 char plugin_instance[DATA_MAX_NAME_LEN];
43 char type[DATA_MAX_NAME_LEN];
44 char type_instance[DATA_MAX_NAME_LEN];
45 char data_source[DATA_MAX_NAME_LEN];
51 struct threshold_s *next;
56 * Private (static) variables
58 static c_avl_tree_t *threshold_tree = NULL;
59 static pthread_mutex_t threshold_lock = PTHREAD_MUTEX_INITIALIZER;
63 * Threshold management
64 * ====================
65 * The following functions add, delete, search, etc. configured thresholds to
66 * the underlying AVL trees.
68 static threshold_t *threshold_get (const char *hostname,
69 const char *plugin, const char *plugin_instance,
70 const char *type, const char *type_instance)
72 char name[6 * DATA_MAX_NAME_LEN];
73 threshold_t *th = NULL;
75 format_name (name, sizeof (name),
76 (hostname == NULL) ? "" : hostname,
77 (plugin == NULL) ? "" : plugin, plugin_instance,
78 (type == NULL) ? "" : type, type_instance);
79 name[sizeof (name) - 1] = '\0';
81 if (c_avl_get (threshold_tree, name, (void *) &th) == 0)
85 } /* threshold_t *threshold_get */
87 static int ut_threshold_add (const threshold_t *th)
89 char name[6 * DATA_MAX_NAME_LEN];
95 if (format_name (name, sizeof (name), th->host,
96 th->plugin, th->plugin_instance,
97 th->type, th->type_instance) != 0)
99 ERROR ("ut_threshold_add: format_name failed.");
103 name_copy = strdup (name);
104 if (name_copy == NULL)
106 ERROR ("ut_threshold_add: strdup failed.");
110 th_copy = (threshold_t *) malloc (sizeof (threshold_t));
114 ERROR ("ut_threshold_add: malloc failed.");
117 memcpy (th_copy, th, sizeof (threshold_t));
120 DEBUG ("ut_threshold_add: Adding entry `%s'", name);
122 pthread_mutex_lock (&threshold_lock);
124 th_ptr = threshold_get (th->host, th->plugin, th->plugin_instance,
125 th->type, th->type_instance);
127 while ((th_ptr != NULL) && (th_ptr->next != NULL))
128 th_ptr = th_ptr->next;
130 if (th_ptr == NULL) /* no such threshold yet */
132 status = c_avl_insert (threshold_tree, name_copy, th_copy);
134 else /* th_ptr points to the last threshold in the list */
136 th_ptr->next = th_copy;
137 /* name_copy isn't needed */
141 pthread_mutex_unlock (&threshold_lock);
145 ERROR ("ut_threshold_add: c_avl_insert (%s) failed.", name);
151 } /* int ut_threshold_add */
153 * End of the threshold management functions
159 * The following approximately two hundred functions are used to handle the
160 * configuration and fill the threshold list.
162 static int ut_config_type_datasource (threshold_t *th, oconfig_item_t *ci)
164 if ((ci->values_num != 1)
165 || (ci->values[0].type != OCONFIG_TYPE_STRING))
167 WARNING ("threshold values: The `DataSource' option needs exactly one "
172 sstrncpy (th->data_source, ci->values[0].value.string,
173 sizeof (th->data_source));
176 } /* int ut_config_type_datasource */
178 static int ut_config_type_instance (threshold_t *th, oconfig_item_t *ci)
180 if ((ci->values_num != 1)
181 || (ci->values[0].type != OCONFIG_TYPE_STRING))
183 WARNING ("threshold values: The `Instance' option needs exactly one "
188 sstrncpy (th->type_instance, ci->values[0].value.string,
189 sizeof (th->type_instance));
192 } /* int ut_config_type_instance */
194 static int ut_config_type_max (threshold_t *th, oconfig_item_t *ci)
196 if ((ci->values_num != 1)
197 || (ci->values[0].type != OCONFIG_TYPE_NUMBER))
199 WARNING ("threshold values: The `%s' option needs exactly one "
200 "number argument.", ci->key);
204 if (strcasecmp (ci->key, "WarningMax") == 0)
205 th->warning_max = ci->values[0].value.number;
207 th->failure_max = ci->values[0].value.number;
210 } /* int ut_config_type_max */
212 static int ut_config_type_min (threshold_t *th, oconfig_item_t *ci)
214 if ((ci->values_num != 1)
215 || (ci->values[0].type != OCONFIG_TYPE_NUMBER))
217 WARNING ("threshold values: The `%s' option needs exactly one "
218 "number argument.", ci->key);
222 if (strcasecmp (ci->key, "WarningMin") == 0)
223 th->warning_min = ci->values[0].value.number;
225 th->failure_min = ci->values[0].value.number;
228 } /* int ut_config_type_min */
230 static int ut_config_type_invert (threshold_t *th, oconfig_item_t *ci)
232 if ((ci->values_num != 1)
233 || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN))
235 WARNING ("threshold values: The `Invert' option needs exactly one "
236 "boolean argument.");
240 if (ci->values[0].value.boolean)
241 th->flags |= UT_FLAG_INVERT;
243 th->flags &= ~UT_FLAG_INVERT;
246 } /* int ut_config_type_invert */
248 static int ut_config_type_persist (threshold_t *th, oconfig_item_t *ci)
250 if ((ci->values_num != 1)
251 || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN))
253 WARNING ("threshold values: The `Persist' option needs exactly one "
254 "boolean argument.");
258 if (ci->values[0].value.boolean)
259 th->flags |= UT_FLAG_PERSIST;
261 th->flags &= ~UT_FLAG_PERSIST;
264 } /* int ut_config_type_persist */
266 static int ut_config_type_percentage(threshold_t *th, oconfig_item_t *ci)
268 if ((ci->values_num != 1)
269 || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN))
271 WARNING ("threshold values: The `Percentage' option needs exactly one "
272 "boolean argument.");
276 if (ci->values[0].value.boolean)
277 th->flags |= UT_FLAG_PERCENTAGE;
279 th->flags &= ~UT_FLAG_PERCENTAGE;
282 } /* int ut_config_type_percentage */
284 static int ut_config_type (const threshold_t *th_orig, oconfig_item_t *ci)
290 if ((ci->values_num != 1)
291 || (ci->values[0].type != OCONFIG_TYPE_STRING))
293 WARNING ("threshold values: The `Type' block needs exactly one string "
298 if (ci->children_num < 1)
300 WARNING ("threshold values: The `Type' block needs at least one option.");
304 memcpy (&th, th_orig, sizeof (th));
305 sstrncpy (th.type, ci->values[0].value.string, sizeof (th.type));
307 th.warning_min = NAN;
308 th.warning_max = NAN;
309 th.failure_min = NAN;
310 th.failure_max = NAN;
312 for (i = 0; i < ci->children_num; i++)
314 oconfig_item_t *option = ci->children + i;
317 if (strcasecmp ("Instance", option->key) == 0)
318 status = ut_config_type_instance (&th, option);
319 else if (strcasecmp ("DataSource", option->key) == 0)
320 status = ut_config_type_datasource (&th, option);
321 else if ((strcasecmp ("WarningMax", option->key) == 0)
322 || (strcasecmp ("FailureMax", option->key) == 0))
323 status = ut_config_type_max (&th, option);
324 else if ((strcasecmp ("WarningMin", option->key) == 0)
325 || (strcasecmp ("FailureMin", option->key) == 0))
326 status = ut_config_type_min (&th, option);
327 else if (strcasecmp ("Invert", option->key) == 0)
328 status = ut_config_type_invert (&th, option);
329 else if (strcasecmp ("Persist", option->key) == 0)
330 status = ut_config_type_persist (&th, option);
331 else if (strcasecmp ("Percentage", option->key) == 0)
332 status = ut_config_type_percentage (&th, option);
335 WARNING ("threshold values: Option `%s' not allowed inside a `Type' "
336 "block.", option->key);
346 status = ut_threshold_add (&th);
350 } /* int ut_config_type */
352 static int ut_config_plugin_instance (threshold_t *th, oconfig_item_t *ci)
354 if ((ci->values_num != 1)
355 || (ci->values[0].type != OCONFIG_TYPE_STRING))
357 WARNING ("threshold values: The `Instance' option needs exactly one "
362 sstrncpy (th->plugin_instance, ci->values[0].value.string,
363 sizeof (th->plugin_instance));
366 } /* int ut_config_plugin_instance */
368 static int ut_config_plugin (const threshold_t *th_orig, oconfig_item_t *ci)
374 if ((ci->values_num != 1)
375 || (ci->values[0].type != OCONFIG_TYPE_STRING))
377 WARNING ("threshold values: The `Plugin' block needs exactly one string "
382 if (ci->children_num < 1)
384 WARNING ("threshold values: The `Plugin' block needs at least one nested "
389 memcpy (&th, th_orig, sizeof (th));
390 sstrncpy (th.plugin, ci->values[0].value.string, sizeof (th.plugin));
392 for (i = 0; i < ci->children_num; i++)
394 oconfig_item_t *option = ci->children + i;
397 if (strcasecmp ("Type", option->key) == 0)
398 status = ut_config_type (&th, option);
399 else if (strcasecmp ("Instance", option->key) == 0)
400 status = ut_config_plugin_instance (&th, option);
403 WARNING ("threshold values: Option `%s' not allowed inside a `Plugin' "
404 "block.", option->key);
413 } /* int ut_config_plugin */
415 static int ut_config_host (const threshold_t *th_orig, oconfig_item_t *ci)
421 if ((ci->values_num != 1)
422 || (ci->values[0].type != OCONFIG_TYPE_STRING))
424 WARNING ("threshold values: The `Host' block needs exactly one string "
429 if (ci->children_num < 1)
431 WARNING ("threshold values: The `Host' block needs at least one nested "
436 memcpy (&th, th_orig, sizeof (th));
437 sstrncpy (th.host, ci->values[0].value.string, sizeof (th.host));
439 for (i = 0; i < ci->children_num; i++)
441 oconfig_item_t *option = ci->children + i;
444 if (strcasecmp ("Type", option->key) == 0)
445 status = ut_config_type (&th, option);
446 else if (strcasecmp ("Plugin", option->key) == 0)
447 status = ut_config_plugin (&th, option);
450 WARNING ("threshold values: Option `%s' not allowed inside a `Host' "
451 "block.", option->key);
460 } /* int ut_config_host */
462 int ut_config (const oconfig_item_t *ci)
469 if (ci->values_num != 0)
471 ERROR ("threshold values: The `Threshold' block may not have any "
476 if (threshold_tree == NULL)
478 threshold_tree = c_avl_create ((void *) strcmp);
479 if (threshold_tree == NULL)
481 ERROR ("ut_config: c_avl_create failed.");
486 memset (&th, '\0', sizeof (th));
487 th.warning_min = NAN;
488 th.warning_max = NAN;
489 th.failure_min = NAN;
490 th.failure_max = NAN;
492 for (i = 0; i < ci->children_num; i++)
494 oconfig_item_t *option = ci->children + i;
497 if (strcasecmp ("Type", option->key) == 0)
498 status = ut_config_type (&th, option);
499 else if (strcasecmp ("Plugin", option->key) == 0)
500 status = ut_config_plugin (&th, option);
501 else if (strcasecmp ("Host", option->key) == 0)
502 status = ut_config_host (&th, option);
505 WARNING ("threshold values: Option `%s' not allowed here.", option->key);
514 } /* int um_config */
516 * End of the functions used to configure threshold values.
520 static threshold_t *threshold_search (const value_list_t *vl)
524 if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance,
525 vl->type, vl->type_instance)) != NULL)
527 else if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance,
528 vl->type, NULL)) != NULL)
530 else if ((th = threshold_get (vl->host, vl->plugin, NULL,
531 vl->type, vl->type_instance)) != NULL)
533 else if ((th = threshold_get (vl->host, vl->plugin, NULL,
534 vl->type, NULL)) != NULL)
536 else if ((th = threshold_get (vl->host, "", NULL,
537 vl->type, vl->type_instance)) != NULL)
539 else if ((th = threshold_get (vl->host, "", NULL,
540 vl->type, NULL)) != NULL)
542 else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance,
543 vl->type, vl->type_instance)) != NULL)
545 else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance,
546 vl->type, NULL)) != NULL)
548 else if ((th = threshold_get ("", vl->plugin, NULL,
549 vl->type, vl->type_instance)) != NULL)
551 else if ((th = threshold_get ("", vl->plugin, NULL,
552 vl->type, NULL)) != NULL)
554 else if ((th = threshold_get ("", "", NULL,
555 vl->type, vl->type_instance)) != NULL)
557 else if ((th = threshold_get ("", "", NULL,
558 vl->type, NULL)) != NULL)
562 } /* threshold_t *threshold_search */
565 * int ut_report_state
567 * Checks if the `state' differs from the old state and creates a notification
571 static int ut_report_state (const data_set_t *ds,
572 const value_list_t *vl,
573 const threshold_t *th,
574 const gauge_t *values,
586 state_old = uc_get_state (ds, vl);
588 /* If the state didn't change, only report if `persistent' is specified and
589 * the state is not `okay'. */
590 if (state == state_old)
592 if ((th->flags & UT_FLAG_PERSIST) == 0)
594 else if (state == STATE_OKAY)
598 if (state != state_old)
599 uc_set_state (ds, vl, state);
601 NOTIFICATION_INIT_VL (&n, vl, ds);
604 bufsize = sizeof (n.message);
606 if (state == STATE_OKAY)
607 n.severity = NOTIF_OKAY;
608 else if (state == STATE_WARNING)
609 n.severity = NOTIF_WARNING;
611 n.severity = NOTIF_FAILURE;
615 status = ssnprintf (buf, bufsize, "Host %s, plugin %s",
616 vl->host, vl->plugin);
620 if (vl->plugin_instance[0] != '\0')
622 status = ssnprintf (buf, bufsize, " (instance %s)",
623 vl->plugin_instance);
628 status = ssnprintf (buf, bufsize, " type %s", vl->type);
632 if (vl->type_instance[0] != '\0')
634 status = ssnprintf (buf, bufsize, " (instance %s)",
640 plugin_notification_meta_add_string (&n, "DataSource",
641 ds->ds[ds_index].name);
642 plugin_notification_meta_add_double (&n, "CurrentValue", values[ds_index]);
643 plugin_notification_meta_add_double (&n, "WarningMin", th->warning_min);
644 plugin_notification_meta_add_double (&n, "WarningMax", th->warning_max);
645 plugin_notification_meta_add_double (&n, "FailureMin", th->failure_min);
646 plugin_notification_meta_add_double (&n, "FailureMax", th->failure_max);
648 /* Send an okay notification */
649 if (state == STATE_OKAY)
651 status = ssnprintf (buf, bufsize, ": All data sources are within range again.");
660 min = (state == STATE_ERROR) ? th->failure_min : th->warning_min;
661 max = (state == STATE_ERROR) ? th->failure_max : th->warning_max;
663 if (th->flags & UT_FLAG_INVERT)
665 if (!isnan (min) && !isnan (max))
667 status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
668 "%f. That is within the %s region of %f%s and %f%s.",
669 ds->ds[ds_index].name, values[ds_index],
670 (state == STATE_ERROR) ? "failure" : "warning",
671 min, ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "",
672 max, ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "");
676 status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
677 "%f. That is %s the %s threshold of %f%s.",
678 ds->ds[ds_index].name, values[ds_index],
679 isnan (min) ? "below" : "above",
680 (state == STATE_ERROR) ? "failure" : "warning",
681 isnan (min) ? max : min,
682 ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "");
685 else /* is not inverted */
687 status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently "
688 "%f. That is %s the %s threshold of %f%s.",
689 ds->ds[ds_index].name, values[ds_index],
690 (values[ds_index] < min) ? "below" : "above",
691 (state == STATE_ERROR) ? "failure" : "warning",
692 (values[ds_index] < min) ? min : max,
693 ((th->flags & UT_FLAG_PERCENTAGE) != 0) ? "%" : "");
699 plugin_dispatch_notification (&n);
701 plugin_notification_meta_free (n.meta);
703 } /* }}} int ut_report_state */
706 * int ut_check_one_data_source
708 * Checks one data source against the given threshold configuration. If the
709 * `DataSource' option is set in the threshold, and the name does NOT match,
710 * `okay' is returned. If the threshold does match, its failure and warning
711 * min and max values are checked and `failure' or `warning' is returned if
715 static int ut_check_one_data_source (const data_set_t *ds,
716 const value_list_t __attribute__((unused)) *vl,
717 const threshold_t *th,
718 const gauge_t *values,
725 /* check if this threshold applies to this data source */
728 ds_name = ds->ds[ds_index].name;
729 if ((th->data_source[0] != 0)
730 && (strcmp (ds_name, th->data_source) != 0))
734 if ((th->flags & UT_FLAG_INVERT) != 0)
740 if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index]))
741 || (!isnan (th->failure_max) && (th->failure_max < values[ds_index])))
744 return (STATE_ERROR);
746 if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index]))
747 || (!isnan (th->warning_max) && (th->warning_max < values[ds_index])))
750 return (STATE_WARNING);
753 } /* }}} int ut_check_one_data_source */
756 * int ut_check_one_threshold
758 * Checks all data sources of a value list against the given threshold, using
759 * the ut_check_one_data_source function above. Returns the worst status,
760 * which is `okay' if nothing has failed.
761 * Returns less than zero if the data set doesn't have any data sources.
763 static int ut_check_one_threshold (const data_set_t *ds,
764 const value_list_t *vl,
765 const threshold_t *th,
766 const gauge_t *values,
772 gauge_t values_copy[ds->ds_num];
774 memcpy (values_copy, values, sizeof (values_copy));
776 if ((th->flags & UT_FLAG_PERCENTAGE) != 0)
783 WARNING ("ut_check_one_threshold: The %s type has only one data "
784 "source, but you have configured to check this as a percentage. "
785 "That doesn't make much sense, because the percentage will always "
786 "be 100%%!", ds->type);
789 /* Prepare `sum' and `num'. */
790 for (i = 0; i < ds->ds_num; i++)
791 if (!isnan (values[i]))
797 if ((num == 0) /* All data sources are undefined. */
798 || (sum == 0.0)) /* Sum is zero, cannot calculate percentage. */
800 for (i = 0; i < ds->ds_num; i++)
801 values_copy[i] = NAN;
803 else /* We can actually calculate the percentage. */
805 for (i = 0; i < ds->ds_num; i++)
806 values_copy[i] = 100.0 * values[i] / sum;
808 } /* if (UT_FLAG_PERCENTAGE) */
810 for (i = 0; i < ds->ds_num; i++)
814 status = ut_check_one_data_source (ds, vl, th, values_copy, i);
820 } /* for (ds->ds_num) */
822 if (ret_ds_index != NULL)
823 *ret_ds_index = ds_index;
826 } /* }}} int ut_check_one_threshold */
829 * int ut_check_threshold (PUBLIC)
831 * Gets a list of matching thresholds and searches for the worst status by one
832 * of the thresholds. Then reports that status using the ut_report_state
834 * Returns zero on success and if no threshold has been configured. Returns
835 * less than zero on failure.
837 int ut_check_threshold (const data_set_t *ds, const value_list_t *vl)
843 int worst_state = -1;
844 threshold_t *worst_th = NULL;
845 int worst_ds_index = -1;
847 if (threshold_tree == NULL)
850 /* Is this lock really necessary? So far, thresholds are only inserted at
852 pthread_mutex_lock (&threshold_lock);
853 th = threshold_search (vl);
854 pthread_mutex_unlock (&threshold_lock);
858 DEBUG ("ut_check_threshold: Found matching threshold(s)");
860 values = uc_get_rate (ds, vl);
868 status = ut_check_one_threshold (ds, vl, th, values, &ds_index);
871 ERROR ("ut_check_threshold: ut_check_one_threshold failed.");
876 if (worst_state < status)
878 worst_state = status;
880 worst_ds_index = ds_index;
886 status = ut_report_state (ds, vl, worst_th, values,
887 worst_ds_index, worst_state);
890 ERROR ("ut_check_threshold: ut_report_state failed.");
898 } /* }}} int ut_check_threshold */
901 * int ut_check_interesting (PUBLIC)
903 * Given an identification returns
904 * 0: No threshold is defined.
905 * 1: A threshold has been found. The flag `persist' is off.
906 * 2: A threshold has been found. The flag `persist' is on.
907 * (That is, it is expected that many notifications are sent until the
908 * problem disappears.)
910 int ut_check_interesting (const char *name)
912 char *name_copy = NULL;
915 char *plugin_instance = NULL;
917 char *type_instance = NULL;
923 /* If there is no tree nothing is interesting. */
924 if (threshold_tree == NULL)
927 name_copy = strdup (name);
928 if (name_copy == NULL)
930 ERROR ("ut_check_interesting: strdup failed.");
934 status = parse_identifier (name_copy, &host,
935 &plugin, &plugin_instance, &type, &type_instance);
938 ERROR ("ut_check_interesting: parse_identifier failed.");
943 memset (&ds, '\0', sizeof (ds));
944 memset (&vl, '\0', sizeof (vl));
946 sstrncpy (vl.host, host, sizeof (vl.host));
947 sstrncpy (vl.plugin, plugin, sizeof (vl.plugin));
948 if (plugin_instance != NULL)
949 sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance));
950 sstrncpy (ds.type, type, sizeof (ds.type));
951 sstrncpy (vl.type, type, sizeof (vl.type));
952 if (type_instance != NULL)
953 sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
956 host = plugin = plugin_instance = type = type_instance = NULL;
958 th = threshold_search (&vl);
961 if ((th->flags & UT_FLAG_PERSIST) == 0)
964 } /* }}} int ut_check_interesting */
966 /* vim: set sw=2 ts=8 sts=2 tw=78 et fdm=marker : */