From a16e967161314d4b2047e36ba0af0d7efa616919 Mon Sep 17 00:00:00 2001 From: Florian Forster Date: Fri, 6 Feb 2009 16:27:02 +0100 Subject: [PATCH] curl plugin: Add a generic plugin to retrieve and parse web pages. --- configure.in | 2 + src/Makefile.am | 14 ++ src/collectd.conf.in | 15 ++ src/collectd.conf.pod | 23 ++ src/curl.c | 615 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 669 insertions(+) create mode 100644 src/curl.c diff --git a/configure.in b/configure.in index d4239852..d73f7b44 100644 --- a/configure.in +++ b/configure.in @@ -3018,6 +3018,7 @@ AC_PLUGIN([bind], [$plugin_bind], [ISC Bind nameserver statistics]) AC_PLUGIN([cpu], [$plugin_cpu], [CPU usage statistics]) AC_PLUGIN([cpufreq], [$plugin_cpufreq], [CPU frequency statistics]) AC_PLUGIN([csv], [yes], [CSV output plugin]) +AC_PLUGIN([curl], [$with_libcurl], [CURL generic web statistics]) AC_PLUGIN([notify_desktop], [$with_libnotify], [Desktop notifications]) AC_PLUGIN([dbi], [$with_libdbi], [General database statistics]) AC_PLUGIN([df], [$plugin_df], [Filesystem usage statistics]) @@ -3216,6 +3217,7 @@ Configuration: cpu . . . . . . . . . $enable_cpu cpufreq . . . . . . . $enable_cpufreq csv . . . . . . . . . $enable_csv + curl . . . . . . . . $enable_curl dbi . . . . . . . . . $enable_dbi df . . . . . . . . . $enable_df disk . . . . . . . . $enable_disk diff --git a/src/Makefile.am b/src/Makefile.am index 935ed7b5..0c3b476b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -201,6 +201,20 @@ collectd_LDADD += "-dlopen" csv.la collectd_DEPENDENCIES += csv.la endif +if BUILD_PLUGIN_CURL +pkglib_LTLIBRARIES += curl.la +curl_la_SOURCES = curl.c +curl_la_LDFLAGS = -module -avoid-version +curl_la_CFLAGS = $(AM_CFLAGS) +curl_la_LIBADD = +collectd_LDADD += "-dlopen" curl.la +if BUILD_WITH_LIBCURL +curl_la_CFLAGS += $(BUILD_WITH_LIBCURL_CFLAGS) +curl_la_LIBADD += $(BUILD_WITH_LIBCURL_LIBS) +endif +collectd_DEPENDENCIES += curl.la +endif + if BUILD_PLUGIN_DBI pkglib_LTLIBRARIES += dbi.la dbi_la_SOURCES = dbi.c \ diff --git a/src/collectd.conf.in b/src/collectd.conf.in index b241c9b6..98449c5e 100644 --- a/src/collectd.conf.in +++ b/src/collectd.conf.in @@ -35,6 +35,7 @@ FQDNLookup true @BUILD_PLUGIN_CPU_TRUE@LoadPlugin cpu @BUILD_PLUGIN_CPUFREQ_TRUE@LoadPlugin cpufreq @BUILD_PLUGIN_CSV_TRUE@LoadPlugin csv +@BUILD_PLUGIN_CURL_TRUE@LoadPlugin curl @BUILD_PLUGIN_DBI_TRUE@LoadPlugin dbi @BUILD_PLUGIN_DF_TRUE@LoadPlugin df @BUILD_PLUGIN_DISK_TRUE@LoadPlugin disk @@ -133,6 +134,20 @@ FQDNLookup true # StoreRates false # +# +# +# URL "http://finance.google.com/finance?q=NYSE%3AAMD" +# User "foo" +# Password "bar" +# +# Regex "]*> *([0-9]*\\.[0-9]+) *" +# DSType "GaugeAverage" +# Type "stock_value" +# Instance "AMD" +# +# +# + # # # Statement "SELECT 'customers' AS c_key, COUNT(*) AS c_value FROM customers_tbl" diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index 1f936735..c975b2c2 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -347,6 +347,29 @@ number. =back +=head2 Plugin C + +The curl plugin uses the B (L) to read web pages +and the match infrastructure (the same code used by the tail plugin) to use +regular expressions with the received data. + +The following example will read the current value of AMD stock from google's +finance page and dispatch the value to collectd. + + + + URL "http://finance.google.com/finance?q=NYSE%3AAMD" + User "foo" + Password "bar" + + Regex "]*> *([0-9]*\\.[0-9]+) *" + DSType "GaugeAverage" + Type "stock_value" + Instance "AMD" + + + + =head2 Plugin C This plugin uses the B library (L) to diff --git a/src/curl.c b/src/curl.c new file mode 100644 index 00000000..cfe5a4ff --- /dev/null +++ b/src/curl.c @@ -0,0 +1,615 @@ +/** + * collectd - src/curl.c + * Copyright (C) 2006-2009 Florian octo Forster + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; only version 2 of the License is applicable. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: + * Florian octo Forster + **/ + +#include "collectd.h" +#include "common.h" +#include "plugin.h" +#include "configfile.h" +#include "utils_match.h" + +#include + +/* + * Data types + */ +struct web_match_s; +typedef struct web_match_s web_match_t; +struct web_match_s /* {{{ */ +{ + char *regex; + int dstype; + char *type; + char *instance; + + cu_match_t *match; + + web_match_t *next; +}; /* }}} */ + +struct web_page_s; +typedef struct web_page_s web_page_t; +struct web_page_s /* {{{ */ +{ + char *instance; + + char *url; + char *user; + char *pass; + char *credentials; + int verify_peer; + int verify_host; + char *cacert; + + CURL *curl; + char curl_errbuf[CURL_ERROR_SIZE]; + char *buffer; + size_t buffer_size; + size_t buffer_fill; + + web_match_t *matches; + + web_page_t *next; +}; /* }}} */ + +/* + * Global variables; + */ +/* static CURLM *curl = NULL; */ +static web_page_t *pages_g = NULL; + +/* + * Private functions + */ +static size_t cc_curl_callback (void *buf, /* {{{ */ + size_t size, size_t nmemb, void *user_data) +{ + web_page_t *wp; + size_t len; + + len = size * nmemb; + if (len <= 0) + return (len); + + wp = user_data; + if (wp == NULL) + return (0); + + if ((wp->buffer_fill + len) >= wp->buffer_size) + { + char *temp; + size_t temp_size; + + temp_size = wp->buffer_fill + len + 1; + temp = (char *) realloc (wp->buffer, temp_size); + if (temp == NULL) + { + ERROR ("curl plugin: realloc failed."); + return (0); + } + wp->buffer = temp; + wp->buffer_size = temp_size; + } + + memcpy (wp->buffer + wp->buffer_fill, (char *) buf, len); + wp->buffer_fill += len; + wp->buffer[wp->buffer_fill] = 0; + + return (len); +} /* }}} size_t cc_curl_callback */ + +static void cc_web_match_free (web_match_t *wm) /* {{{ */ +{ + if (wm == NULL) + return; + + sfree (wm->regex); + sfree (wm->type); + sfree (wm->instance); + match_destroy (wm->match); + cc_web_match_free (wm->next); + sfree (wm); +} /* }}} void cc_web_match_free */ + +static void cc_web_page_free (web_page_t *wp) /* {{{ */ +{ + if (wp == NULL) + return; + + if (wp->curl != NULL) + curl_easy_cleanup (wp->curl); + wp->curl = NULL; + + sfree (wp->instance); + + sfree (wp->url); + sfree (wp->user); + sfree (wp->pass); + sfree (wp->credentials); + sfree (wp->cacert); + + sfree (wp->buffer); + + cc_web_match_free (wp->matches); + cc_web_page_free (wp->next); + sfree (wp); +} /* }}} void cc_web_page_free */ + +static int cc_config_add_string (const char *name, char **dest, /* {{{ */ + oconfig_item_t *ci) +{ + if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) + { + WARNING ("curl plugin: `%s' needs exactly one string argument.", name); + return (-1); + } + + sfree (*dest); + *dest = strdup (ci->values[0].value.string); + if (*dest == NULL) + return (-1); + + return (0); +} /* }}} int cc_config_add_string */ + +static int cc_config_set_boolean (const char *name, int *dest, /* {{{ */ + oconfig_item_t *ci) +{ + if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN)) + { + WARNING ("curl plugin: `%s' needs exactly one boolean argument.", name); + return (-1); + } + + *dest = ci->values[0].value.boolean ? 1 : 0; + + return (0); +} /* }}} int cc_config_set_boolean */ + +static int cc_config_add_match_dstype (int *dstype_ret, /* {{{ */ + oconfig_item_t *ci) +{ + int dstype; + + if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) + { + WARNING ("curl plugin: `DSType' needs exactly one string argument."); + return (-1); + } + + if (strncasecmp ("Gauge", ci->values[0].value.string, + strlen ("Gauge")) == 0) + { + dstype = UTILS_MATCH_DS_TYPE_GAUGE; + if (strcasecmp ("GaugeAverage", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_GAUGE_AVERAGE; + else if (strcasecmp ("GaugeMin", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_GAUGE_MIN; + else if (strcasecmp ("GaugeMax", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_GAUGE_MAX; + else if (strcasecmp ("GaugeLast", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_GAUGE_LAST; + else + dstype = 0; + } + else if (strncasecmp ("Counter", ci->values[0].value.string, + strlen ("Counter")) == 0) + { + dstype = UTILS_MATCH_DS_TYPE_COUNTER; + if (strcasecmp ("CounterSet", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_COUNTER_SET; + else if (strcasecmp ("CounterAdd", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_COUNTER_ADD; + else if (strcasecmp ("CounterInc", ci->values[0].value.string) == 0) + dstype |= UTILS_MATCH_CF_COUNTER_INC; + else + dstype = 0; + } + else + { + dstype = 0; + } + + if (dstype == 0) + { + WARNING ("curl plugin: `%s' is not a valid argument to `DSType'.", + ci->values[0].value.string); + return (-1); + } + + *dstype_ret = dstype; + return (0); +} /* }}} int cc_config_add_match_dstype */ + +static int cc_config_add_match (web_page_t *page, /* {{{ */ + oconfig_item_t *ci) +{ + web_match_t *match; + int status; + int i; + + if (ci->values_num != 0) + { + WARNING ("curl plugin: Ignoring arguments for the `Match' block."); + } + + match = (web_match_t *) malloc (sizeof (*match)); + if (match == NULL) + { + ERROR ("curl plugin: malloc failed."); + return (-1); + } + memset (match, 0, sizeof (*match)); + + status = 0; + for (i = 0; i < ci->children_num; i++) + { + oconfig_item_t *child = ci->children + i; + + if (strcasecmp ("Regex", child->key) == 0) + status = cc_config_add_string ("Regex", &match->regex, child); + else if (strcasecmp ("DSType", child->key) == 0) + status = cc_config_add_match_dstype (&match->dstype, child); + else if (strcasecmp ("Type", child->key) == 0) + status = cc_config_add_string ("Type", &match->type, child); + else if (strcasecmp ("Instance", child->key) == 0) + status = cc_config_add_string ("Instance", &match->instance, child); + else + { + WARNING ("curl plugin: Option `%s' not allowed here.", child->key); + status = -1; + } + + if (status != 0) + break; + } /* for (i = 0; i < ci->children_num; i++) */ + + while (status == 0) + { + if (match->regex == NULL) + { + WARNING ("curl plugin: `Regex' missing in `Match' block."); + status = -1; + } + + if (match->type == NULL) + { + WARNING ("curl plugin: `Type' missing in `Match' block."); + status = -1; + } + + if (match->dstype == 0) + { + WARNING ("curl plugin: `DSType' missing in `Match' block."); + status = -1; + } + + break; + } /* while (status == 0) */ + + if (status != 0) + return (status); + + match->match = match_create_simple (match->regex, match->dstype); + if (match->match == NULL) + { + ERROR ("curl plugin: tail_match_add_match_simple failed."); + cc_web_match_free (match); + return (-1); + } + else + { + web_match_t *prev; + + prev = page->matches; + while ((prev != NULL) && (prev->next != NULL)) + prev = prev->next; + + if (prev == NULL) + page->matches = match; + else + prev->next = match; + } + + return (0); +} /* }}} int cc_config_add_match */ + +static int cc_page_init_curl (web_page_t *wp) /* {{{ */ +{ + wp->curl = curl_easy_init (); + if (wp->curl == NULL) + { + ERROR ("curl plugin: curl_easy_init failed."); + return (-1); + } + + curl_easy_setopt (wp->curl, CURLOPT_WRITEFUNCTION, cc_curl_callback); + curl_easy_setopt (wp->curl, CURLOPT_WRITEDATA, wp); + curl_easy_setopt (wp->curl, CURLOPT_USERAGENT, + PACKAGE_NAME"/"PACKAGE_VERSION); + curl_easy_setopt (wp->curl, CURLOPT_ERRORBUFFER, wp->curl_errbuf); + curl_easy_setopt (wp->curl, CURLOPT_URL, wp->url); + + if (wp->user != NULL) + { + size_t credentials_size; + + credentials_size = strlen (wp->user) + 2; + if (wp->pass != NULL) + credentials_size += strlen (wp->pass); + + wp->credentials = (char *) malloc (credentials_size); + if (wp->credentials == NULL) + { + ERROR ("curl plugin: malloc failed."); + return (-1); + } + + ssnprintf (wp->credentials, credentials_size, "%s:%s", + wp->user, (wp->pass == NULL) ? "" : wp->pass); + curl_easy_setopt (wp->curl, CURLOPT_USERPWD, wp->credentials); + } + + curl_easy_setopt (wp->curl, CURLOPT_SSL_VERIFYPEER, wp->verify_peer); + curl_easy_setopt (wp->curl, CURLOPT_SSL_VERIFYHOST, + wp->verify_host ? 2 : 0); + if (wp->cacert != NULL) + curl_easy_setopt (wp->curl, CURLOPT_CAINFO, wp->cacert); + + return (0); +} /* }}} int cc_page_init_curl */ + +static int cc_config_add_page (oconfig_item_t *ci) /* {{{ */ +{ + web_page_t *page; + int status; + int i; + + if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) + { + WARNING ("curl plugin: `Page' blocks need exactly one string argument."); + return (-1); + } + + page = (web_page_t *) malloc (sizeof (*page)); + if (page == NULL) + { + ERROR ("curl plugin: malloc failed."); + return (-1); + } + memset (page, 0, sizeof (*page)); + + page->instance = strdup (ci->values[0].value.string); + if (page->instance == NULL) + { + ERROR ("curl plugin: strdup failed."); + sfree (page); + return (-1); + } + + /* Process all children */ + status = 0; + for (i = 0; i < ci->children_num; i++) + { + oconfig_item_t *child = ci->children + i; + + if (strcasecmp ("URL", child->key) == 0) + status = cc_config_add_string ("URL", &page->url, child); + else if (strcasecmp ("User", child->key) == 0) + status = cc_config_add_string ("User", &page->user, child); + else if (strcasecmp ("Password", child->key) == 0) + status = cc_config_add_string ("Password", &page->pass, child); + else if (strcasecmp ("VerifyPeer", child->key) == 0) + status = cc_config_set_boolean ("VerifyPeer", &page->verify_peer, child); + else if (strcasecmp ("VerifyHost", child->key) == 0) + status = cc_config_set_boolean ("VerifyHost", &page->verify_host, child); + else if (strcasecmp ("CACert", child->key) == 0) + status = cc_config_add_string ("CACert", &page->cacert, child); + else if (strcasecmp ("Match", child->key) == 0) + /* Be liberal with failing matches => don't set `status'. */ + cc_config_add_match (page, child); + else + { + WARNING ("curl plugin: Option `%s' not allowed here.", child->key); + status = -1; + } + + if (status != 0) + break; + } /* for (i = 0; i < ci->children_num; i++) */ + + /* Additionial sanity checks and libCURL initialization. */ + while (status == 0) + { + if (page->url == NULL) + { + WARNING ("curl plugin: `URL' missing in `Page' block."); + status = -1; + } + + if (page->matches == NULL) + { + assert (page->instance != NULL); + WARNING ("curl plugin: No (valid) `Match' block " + "within `Page' block `%s'.", page->instance); + status = -1; + } + + if (status == 0) + status = cc_page_init_curl (page); + + break; + } /* while (status == 0) */ + + if (status != 0) + { + cc_web_page_free (page); + return (status); + } + + /* Add the new page to the linked list */ + if (pages_g == NULL) + pages_g = page; + else + { + web_page_t *prev; + + prev = pages_g; + while ((prev != NULL) && (prev->next != NULL)) + prev = prev->next; + prev->next = page; + } + + return (0); +} /* }}} int cc_config_add_page */ + +static int cc_config (oconfig_item_t *ci) /* {{{ */ +{ + int success; + int errors; + int status; + int i; + + success = 0; + errors = 0; + + for (i = 0; i < ci->children_num; i++) + { + oconfig_item_t *child = ci->children + i; + + if (strcasecmp ("Page", child->key) == 0) + { + status = cc_config_add_page (child); + if (status == 0) + success++; + else + errors++; + } + else + { + WARNING ("curl plugin: Option `%s' not allowed here.", child->key); + errors++; + } + } + + if ((success == 0) && (errors > 0)) + { + ERROR ("curl plugin: All statements failed."); + return (-1); + } + + return (0); +} /* }}} int cc_config */ + +static int cc_init (void) /* {{{ */ +{ + if (pages_g == NULL) + { + INFO ("curl plugin: No pages have been defined."); + return (-1); + } + return (0); +} /* }}} int cc_init */ + +static void cc_submit (const web_page_t *wp, const web_match_t *wm, /* {{{ */ + const cu_match_value_t *mv) +{ + value_t values[1]; + value_list_t vl = VALUE_LIST_INIT; + + values[0] = mv->value; + + vl.values = values; + vl.values_len = 1; + vl.time = time (NULL); + sstrncpy (vl.host, hostname_g, sizeof (vl.host)); + sstrncpy (vl.plugin, "curl", sizeof (vl.plugin)); + sstrncpy (vl.plugin_instance, wp->instance, sizeof (vl.plugin_instance)); + sstrncpy (vl.type, wm->type, sizeof (vl.type)); + sstrncpy (vl.type_instance, wm->instance, sizeof (vl.type_instance)); + + plugin_dispatch_values (&vl); +} /* }}} void cc_submit */ + +static int cc_read_page (web_page_t *wp) /* {{{ */ +{ + web_match_t *wm; + int status; + + wp->buffer_fill = 0; + status = curl_easy_perform (wp->curl); + if (status != 0) + { + ERROR ("curl plugin: curl_easy_perform failed with staus %i: %s", + status, wp->curl_errbuf); + return (-1); + } + + for (wm = wp->matches; wm != NULL; wm = wm->next) + { + cu_match_value_t *mv; + + status = match_apply (wm->match, wp->buffer); + if (status != 0) + { + WARNING ("curl plugin: match_apply failed."); + continue; + } + + mv = match_get_user_data (wm->match); + if (mv == NULL) + { + WARNING ("curl plugin: match_get_user_data returned NULL."); + continue; + } + + cc_submit (wp, wm, mv); + } /* for (wm = wp->matches; wm != NULL; wm = wm->next) */ + + return (0); +} /* }}} int cc_read_page */ + +static int cc_read (void) /* {{{ */ +{ + web_page_t *wp; + + for (wp = pages_g; wp != NULL; wp = wp->next) + cc_read_page (wp); + + return (0); +} /* }}} int cc_read */ + +static int cc_shutdown (void) /* {{{ */ +{ + cc_web_page_free (pages_g); + pages_g = NULL; + + return (0); +} /* }}} int cc_shutdown */ + +void module_register (void) +{ + plugin_register_complex_config ("curl", cc_config); + plugin_register_init ("curl", cc_init); + plugin_register_read ("curl", cc_read); + plugin_register_shutdown ("curl", cc_shutdown); +} /* void module_register */ + +/* vim: set sw=2 sts=2 et fdm=marker : */ -- 2.11.0