From: Sebastian Harl Date: Sun, 25 Nov 2007 13:54:05 +0000 (+0100) Subject: collectdmon: Added a small daemon monitoring collectd. X-Git-Tag: collectd-4.3.0beta0~80 X-Git-Url: https://git.verplant.org/?a=commitdiff_plain;h=69e5d982e7adf27a0eca6e6431f748f4913d80ac;p=collectd.git collectdmon: Added a small daemon monitoring collectd. This is a small "wrapper" daemon which starts and monitors the collectd daemon. If collectd terminates it will automatically be restarted, unless collectdmon was told to shut it down. Current features: * restarting the daemon * logging to syslog * detection of quickly, repeatedly respawning processes (similar to SysV init) collectdmon is similar to mysqld_safe. Signed-off-by: Sebastian Harl Signed-off-by: Florian Forster --- diff --git a/src/Makefile.am b/src/Makefile.am index 06f45dca..79b3438b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,7 +19,7 @@ AM_CPPFLAGS += -DPIDFILE='"${localstatedir}/run/${PACKAGE_NAME}.pid"' endif AM_CPPFLAGS += -DPLUGINDIR='"${pkglibdir}"' -sbin_PROGRAMS = collectd +sbin_PROGRAMS = collectd collectdmon bin_PROGRAMS = collectd-nagios collectd_SOURCES = collectd.c collectd.h \ @@ -77,6 +77,9 @@ else collectd_LDFLAGS += -loconfig endif +collectdmon_SOURCES = collectdmon.c +collectdmon_CPPFLAGS = $(AM_CPPFLAGS) + collectd_nagios_SOURCES = collectd-nagios.c collectd_nagios_LDFLAGS = if BUILD_WITH_LIBSOCKET diff --git a/src/collectdmon.c b/src/collectdmon.c new file mode 100644 index 00000000..39dc0f25 --- /dev/null +++ b/src/collectdmon.c @@ -0,0 +1,354 @@ +/** + * collectd - src/collectdmon.c + * Copyright (C) 2007 Sebastian Harl + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; only version 2 of the License is applicable. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: + * Sebastian Harl + **/ + +#include "config.h" + +#include + +#include + +#include + +#include + +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#ifndef COLLECTDMON_PIDFILE +# define COLLECTDMON_PIDFILE LOCALSTATEDIR"/run/collectdmon.pid" +#endif /* ! COLLECTDMON_PIDFILE */ + +#ifndef WCOREDUMP +# define WCOREDUMP(s) 0 +#endif /* ! WCOREDUMP */ + +static int loop = 0; + +static char *pidfile = NULL; +static pid_t collectd_pid = 0; + +static void exit_usage (char *name) +{ + printf ("Usage: %s [-- ]\n" + + "\nAvailable options:\n" + " -h Display this help and exit.\n" + " -c Path to the collectd binary.\n" + " -P PID-file.\n" + + "\nFor see collectd.conf(5).\n" + + "\n"PACKAGE" "VERSION", http://collectd.org/\n" + "by Florian octo Forster \n" + "for contributions see `AUTHORS'\n", name); + exit (0); +} /* exit_usage */ + +static int pidfile_create (void) +{ + FILE *file = NULL; + + if (NULL == pidfile) + pidfile = COLLECTDMON_PIDFILE; + + if (NULL == (file = fopen (pidfile, "w"))) { + syslog (LOG_ERR, "Error: couldn't open PID-file (%s) for writing: %s", + pidfile, strerror (errno)); + return -1; + } + + fprintf (file, "%i\n", (int)getpid ()); + fclose (file); + return 0; +} /* pidfile_create */ + +static int pidfile_delete (void) +{ + assert (NULL != pidfile); + + if (0 != unlink (pidfile)) { + syslog (LOG_ERR, "Error: couldn't delete PID-file (%s): %s", + pidfile, strerror (errno)); + return -1; + } + return 0; +} /* pidfile_remove */ + +static int daemonize (void) +{ + struct rlimit rl; + + pid_t pid = 0; + int i = 0; + + if (0 != chdir ("/")) { + fprintf (stderr, "Error: chdir() failed: %s\n", strerror (errno)); + return -1; + } + + if (0 != getrlimit (RLIMIT_NOFILE, &rl)) { + fprintf (stderr, "Error: getrlimit() failed: %s\n", strerror (errno)); + return -1; + } + + if (0 > (pid = fork ())) { + fprintf (stderr, "Error: fork() failed: %s\n", strerror (errno)); + return -1; + } + else if (pid != 0) { + exit (0); + } + + if (0 != pidfile_create ()) + return -1; + + setsid (); + + if (RLIM_INFINITY == rl.rlim_max) + rl.rlim_max = 1024; + + for (i = 0; i < rl.rlim_max; ++i) + close (i); + + errno = 0; + if (open ("/dev/null", O_RDWR) != 0) { + syslog (LOG_ERR, "Error: couldn't connect STDIN to /dev/null: %s", + strerror (errno)); + return -1; + } + + errno = 0; + if (dup (0) != 1) { + syslog (LOG_ERR, "Error: couldn't connect STDOUT to /dev/null: %s", + strerror (errno)); + return -1; + } + + errno = 0; + if (dup (0) != 2) { + syslog (LOG_ERR, "Error: couldn't connect STDERR to /dev/null: %s", + strerror (errno)); + return -1; + } + return 0; +} /* daemonize */ + +static int collectd_start (int argc, char **argv) +{ + pid_t pid = 0; + + if (0 > (pid = fork ())) { + syslog (LOG_ERR, "Error: fork() failed: %s", strerror (errno)); + return -1; + } + else if (pid != 0) { + collectd_pid = pid; + return 0; + } + + execvp (argv[0], argv); + syslog (LOG_ERR, "Error: execvp(%s) failed: %s", + argv[0], strerror (errno)); + exit (-1); +} /* collectd_start */ + +static int collectd_stop (void) +{ + if (0 == collectd_pid) + return 0; + + if (0 != kill (collectd_pid, SIGTERM)) { + syslog (LOG_ERR, "Error: kill() failed: %s", strerror (errno)); + return -1; + } + return 0; +} /* collectd_stop */ + +static void sig_int_term_handler (int signo) +{ + ++loop; + return; +} /* sig_int_term_handler */ + +static void log_status (int status) +{ + if (WIFEXITED (status)) { + syslog (LOG_INFO, "Info: collectd terminated with exit status %i", + WEXITSTATUS (status)); + } + else if (WIFSIGNALED (status)) { + syslog (LOG_WARNING, "Warning: collectd was terminated by signal %i%s", + WTERMSIG (status), WCOREDUMP (status) ? " (core dumped)" : ""); + } + return; +} /* log_status */ + +static void check_respawn (void) +{ + time_t t = time (NULL); + + static time_t timestamp = 0; + static int counter = 0; + + if ((t - 120) < timestamp) + ++counter; + else { + timestamp = t; + counter = 0; + } + + if (10 < counter) { + unsigned int time_left = 300; + + syslog (LOG_ERR, "Error: collectd is respawning too fast - " + "disabled for %i seconds", time_left); + + while ((0 < (time_left = sleep (time_left))) && (0 == loop)); + } + return; +} /* check_respawn */ + +int main (int argc, char **argv) +{ + int collectd_argc = 0; + char *collectd = NULL; + char **collectd_argv = NULL; + + struct sigaction sa; + + int i = 0; + + /* parse command line options */ + while (42) { + int c = getopt (argc, argv, "hc:P:"); + + if (-1 == c) + break; + + switch (c) { + case 'c': + collectd = optarg; + break; + case 'P': + pidfile = optarg; + break; + case 'h': + default: + exit_usage (argv[0]); + } + } + + for (i = optind; i < argc; ++i) + if (0 == strcmp (argv[i], "-f")) + break; + + /* i < argc => -f already present */ + collectd_argc = 1 + argc - optind + ((i < argc) ? 0 : 1); + collectd_argv = (char **)calloc (collectd_argc + 1, sizeof (char *)); + + if (NULL == collectd_argv) { + fprintf (stderr, "Out of memory."); + return 3; + } + + collectd_argv[0] = (NULL == collectd) ? "collectd" : collectd; + + if (i == argc) + collectd_argv[collectd_argc - 1] = "-f"; + + for (i = optind; i < argc; ++i) + collectd_argv[i - optind + 1] = argv[i]; + + collectd_argv[collectd_argc] = NULL; + + openlog ("collectdmon", LOG_CONS | LOG_PID, LOG_DAEMON); + + if (-1 == daemonize ()) + return 1; + + sa.sa_handler = sig_int_term_handler; + sa.sa_flags = 0; + sigemptyset (&sa.sa_mask); + + if (0 != sigaction (SIGINT, &sa, NULL)) { + syslog (LOG_ERR, "Error: sigaction() failed: %s", strerror (errno)); + return 1; + } + + if (0 != sigaction (SIGTERM, &sa, NULL)) { + syslog (LOG_ERR, "Error: sigaction() failed: %s", strerror (errno)); + return 1; + } + + sigaddset (&sa.sa_mask, SIGCHLD); + if (0 != sigprocmask (SIG_BLOCK, &sa.sa_mask, NULL)) { + syslog (LOG_ERR, "Error: sigprocmask() failed: %s", strerror (errno)); + return 1; + } + + while (0 == loop) { + int status = 0; + + if (0 != collectd_start (collectd_argc, collectd_argv)) { + syslog (LOG_ERR, "Error: failed to start collectd."); + break; + } + + assert (0 < collectd_pid); + while ((collectd_pid != waitpid (collectd_pid, &status, 0)) + && (EINTR == errno)) + if (0 != loop) + collectd_stop (); + + collectd_pid = 0; + + log_status (status); + check_respawn (); + + if (0 == loop) + syslog (LOG_WARNING, "Warning: restarting collectd"); + } + + syslog (LOG_INFO, "Info: shutting down collectdmon"); + + pidfile_delete (); + closelog (); + + free (collectd_argv); + return 0; +} /* main */ + +/* vim: set sw=4 ts=4 tw=78 noexpandtab : */ +