[collectd] [PATCH] collectdmon: Added a small daemon monitoring collectd.
Sebastian Harl
sh at tokkee.org
Sun Nov 25 14:54:05 CET 2007
This is a small "wrapper" daemon which starts and monitors the collectd
daemon. If collectd terminates it will automatically be restarted, unless
collectdmon was told to shut it down.
Current features:
* restarting the daemon
* logging to syslog
* detection of quickly, repeatedly respawning processes
(similar to SysV init)
collectdmon is similar to mysqld_safe.
Signed-off-by: Sebastian Harl <sh at tokkee.org>
---
src/Makefile.am | 5 +-
src/collectdmon.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 358 insertions(+), 1 deletions(-)
create mode 100644 src/collectdmon.c
diff --git a/src/Makefile.am b/src/Makefile.am
index 06f45dc..79b3438 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -19,7 +19,7 @@ AM_CPPFLAGS += -DPIDFILE='"${localstatedir}/run/${PACKAGE_NAME}.pid"'
endif
AM_CPPFLAGS += -DPLUGINDIR='"${pkglibdir}"'
-sbin_PROGRAMS = collectd
+sbin_PROGRAMS = collectd collectdmon
bin_PROGRAMS = collectd-nagios
collectd_SOURCES = collectd.c collectd.h \
@@ -77,6 +77,9 @@ else
collectd_LDFLAGS += -loconfig
endif
+collectdmon_SOURCES = collectdmon.c
+collectdmon_CPPFLAGS = $(AM_CPPFLAGS)
+
collectd_nagios_SOURCES = collectd-nagios.c
collectd_nagios_LDFLAGS =
if BUILD_WITH_LIBSOCKET
diff --git a/src/collectdmon.c b/src/collectdmon.c
new file mode 100644
index 0000000..39dc0f2
--- /dev/null
+++ b/src/collectdmon.c
@@ -0,0 +1,354 @@
+/**
+ * collectd - src/collectdmon.c
+ * Copyright (C) 2007 Sebastian Harl
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; only version 2 of the License is applicable.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Author:
+ * Sebastian Harl <sh at tokkee.org>
+ **/
+
+#include "config.h"
+
+#include <assert.h>
+
+#include <errno.h>
+
+#include <fcntl.h>
+
+#include <signal.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string.h>
+
+#include <syslog.h>
+
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include <time.h>
+
+#include <unistd.h>
+
+#ifndef COLLECTDMON_PIDFILE
+# define COLLECTDMON_PIDFILE LOCALSTATEDIR"/run/collectdmon.pid"
+#endif /* ! COLLECTDMON_PIDFILE */
+
+#ifndef WCOREDUMP
+# define WCOREDUMP(s) 0
+#endif /* ! WCOREDUMP */
+
+static int loop = 0;
+
+static char *pidfile = NULL;
+static pid_t collectd_pid = 0;
+
+static void exit_usage (char *name)
+{
+ printf ("Usage: %s <options> [-- <collectd options>]\n"
+
+ "\nAvailable options:\n"
+ " -h Display this help and exit.\n"
+ " -c <path> Path to the collectd binary.\n"
+ " -P <file> PID-file.\n"
+
+ "\nFor <collectd options> see collectd.conf(5).\n"
+
+ "\n"PACKAGE" "VERSION", http://collectd.org/\n"
+ "by Florian octo Forster <octo at verplant.org>\n"
+ "for contributions see `AUTHORS'\n", name);
+ exit (0);
+} /* exit_usage */
+
+static int pidfile_create (void)
+{
+ FILE *file = NULL;
+
+ if (NULL == pidfile)
+ pidfile = COLLECTDMON_PIDFILE;
+
+ if (NULL == (file = fopen (pidfile, "w"))) {
+ syslog (LOG_ERR, "Error: couldn't open PID-file (%s) for writing: %s",
+ pidfile, strerror (errno));
+ return -1;
+ }
+
+ fprintf (file, "%i\n", (int)getpid ());
+ fclose (file);
+ return 0;
+} /* pidfile_create */
+
+static int pidfile_delete (void)
+{
+ assert (NULL != pidfile);
+
+ if (0 != unlink (pidfile)) {
+ syslog (LOG_ERR, "Error: couldn't delete PID-file (%s): %s",
+ pidfile, strerror (errno));
+ return -1;
+ }
+ return 0;
+} /* pidfile_remove */
+
+static int daemonize (void)
+{
+ struct rlimit rl;
+
+ pid_t pid = 0;
+ int i = 0;
+
+ if (0 != chdir ("/")) {
+ fprintf (stderr, "Error: chdir() failed: %s\n", strerror (errno));
+ return -1;
+ }
+
+ if (0 != getrlimit (RLIMIT_NOFILE, &rl)) {
+ fprintf (stderr, "Error: getrlimit() failed: %s\n", strerror (errno));
+ return -1;
+ }
+
+ if (0 > (pid = fork ())) {
+ fprintf (stderr, "Error: fork() failed: %s\n", strerror (errno));
+ return -1;
+ }
+ else if (pid != 0) {
+ exit (0);
+ }
+
+ if (0 != pidfile_create ())
+ return -1;
+
+ setsid ();
+
+ if (RLIM_INFINITY == rl.rlim_max)
+ rl.rlim_max = 1024;
+
+ for (i = 0; i < rl.rlim_max; ++i)
+ close (i);
+
+ errno = 0;
+ if (open ("/dev/null", O_RDWR) != 0) {
+ syslog (LOG_ERR, "Error: couldn't connect STDIN to /dev/null: %s",
+ strerror (errno));
+ return -1;
+ }
+
+ errno = 0;
+ if (dup (0) != 1) {
+ syslog (LOG_ERR, "Error: couldn't connect STDOUT to /dev/null: %s",
+ strerror (errno));
+ return -1;
+ }
+
+ errno = 0;
+ if (dup (0) != 2) {
+ syslog (LOG_ERR, "Error: couldn't connect STDERR to /dev/null: %s",
+ strerror (errno));
+ return -1;
+ }
+ return 0;
+} /* daemonize */
+
+static int collectd_start (int argc, char **argv)
+{
+ pid_t pid = 0;
+
+ if (0 > (pid = fork ())) {
+ syslog (LOG_ERR, "Error: fork() failed: %s", strerror (errno));
+ return -1;
+ }
+ else if (pid != 0) {
+ collectd_pid = pid;
+ return 0;
+ }
+
+ execvp (argv[0], argv);
+ syslog (LOG_ERR, "Error: execvp(%s) failed: %s",
+ argv[0], strerror (errno));
+ exit (-1);
+} /* collectd_start */
+
+static int collectd_stop (void)
+{
+ if (0 == collectd_pid)
+ return 0;
+
+ if (0 != kill (collectd_pid, SIGTERM)) {
+ syslog (LOG_ERR, "Error: kill() failed: %s", strerror (errno));
+ return -1;
+ }
+ return 0;
+} /* collectd_stop */
+
+static void sig_int_term_handler (int signo)
+{
+ ++loop;
+ return;
+} /* sig_int_term_handler */
+
+static void log_status (int status)
+{
+ if (WIFEXITED (status)) {
+ syslog (LOG_INFO, "Info: collectd terminated with exit status %i",
+ WEXITSTATUS (status));
+ }
+ else if (WIFSIGNALED (status)) {
+ syslog (LOG_WARNING, "Warning: collectd was terminated by signal %i%s",
+ WTERMSIG (status), WCOREDUMP (status) ? " (core dumped)" : "");
+ }
+ return;
+} /* log_status */
+
+static void check_respawn (void)
+{
+ time_t t = time (NULL);
+
+ static time_t timestamp = 0;
+ static int counter = 0;
+
+ if ((t - 120) < timestamp)
+ ++counter;
+ else {
+ timestamp = t;
+ counter = 0;
+ }
+
+ if (10 < counter) {
+ unsigned int time_left = 300;
+
+ syslog (LOG_ERR, "Error: collectd is respawning too fast - "
+ "disabled for %i seconds", time_left);
+
+ while ((0 < (time_left = sleep (time_left))) && (0 == loop));
+ }
+ return;
+} /* check_respawn */
+
+int main (int argc, char **argv)
+{
+ int collectd_argc = 0;
+ char *collectd = NULL;
+ char **collectd_argv = NULL;
+
+ struct sigaction sa;
+
+ int i = 0;
+
+ /* parse command line options */
+ while (42) {
+ int c = getopt (argc, argv, "hc:P:");
+
+ if (-1 == c)
+ break;
+
+ switch (c) {
+ case 'c':
+ collectd = optarg;
+ break;
+ case 'P':
+ pidfile = optarg;
+ break;
+ case 'h':
+ default:
+ exit_usage (argv[0]);
+ }
+ }
+
+ for (i = optind; i < argc; ++i)
+ if (0 == strcmp (argv[i], "-f"))
+ break;
+
+ /* i < argc => -f already present */
+ collectd_argc = 1 + argc - optind + ((i < argc) ? 0 : 1);
+ collectd_argv = (char **)calloc (collectd_argc + 1, sizeof (char *));
+
+ if (NULL == collectd_argv) {
+ fprintf (stderr, "Out of memory.");
+ return 3;
+ }
+
+ collectd_argv[0] = (NULL == collectd) ? "collectd" : collectd;
+
+ if (i == argc)
+ collectd_argv[collectd_argc - 1] = "-f";
+
+ for (i = optind; i < argc; ++i)
+ collectd_argv[i - optind + 1] = argv[i];
+
+ collectd_argv[collectd_argc] = NULL;
+
+ openlog ("collectdmon", LOG_CONS | LOG_PID, LOG_DAEMON);
+
+ if (-1 == daemonize ())
+ return 1;
+
+ sa.sa_handler = sig_int_term_handler;
+ sa.sa_flags = 0;
+ sigemptyset (&sa.sa_mask);
+
+ if (0 != sigaction (SIGINT, &sa, NULL)) {
+ syslog (LOG_ERR, "Error: sigaction() failed: %s", strerror (errno));
+ return 1;
+ }
+
+ if (0 != sigaction (SIGTERM, &sa, NULL)) {
+ syslog (LOG_ERR, "Error: sigaction() failed: %s", strerror (errno));
+ return 1;
+ }
+
+ sigaddset (&sa.sa_mask, SIGCHLD);
+ if (0 != sigprocmask (SIG_BLOCK, &sa.sa_mask, NULL)) {
+ syslog (LOG_ERR, "Error: sigprocmask() failed: %s", strerror (errno));
+ return 1;
+ }
+
+ while (0 == loop) {
+ int status = 0;
+
+ if (0 != collectd_start (collectd_argc, collectd_argv)) {
+ syslog (LOG_ERR, "Error: failed to start collectd.");
+ break;
+ }
+
+ assert (0 < collectd_pid);
+ while ((collectd_pid != waitpid (collectd_pid, &status, 0))
+ && (EINTR == errno))
+ if (0 != loop)
+ collectd_stop ();
+
+ collectd_pid = 0;
+
+ log_status (status);
+ check_respawn ();
+
+ if (0 == loop)
+ syslog (LOG_WARNING, "Warning: restarting collectd");
+ }
+
+ syslog (LOG_INFO, "Info: shutting down collectdmon");
+
+ pidfile_delete ();
+ closelog ();
+
+ free (collectd_argv);
+ return 0;
+} /* main */
+
+/* vim: set sw=4 ts=4 tw=78 noexpandtab : */
+
--
1.5.3.6.736.gb7f30
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
Url : http://mailman.verplant.org/pipermail/collectd/attachments/20071125/27f47620/attachment.pgp
More information about the collectd
mailing list