summaryrefslogtreecommitdiffabout
authorMichael Krelin <hacker@klever.net>2004-07-23 20:40:46 (UTC)
committer Michael Krelin <hacker@klever.net>2004-07-23 20:40:46 (UTC)
commit546858a1e4d13d179a6af27b474e1396cfdf0c29 (patch) (side-by-side diff)
treeac19b0ff5e4b3164ad5375bda112a9d6d2f88c2b
parent76921288a0aa39acb53102863523c388b5d0f9ee (diff)
downloaddudki-546858a1e4d13d179a6af27b474e1396cfdf0c29.zip
dudki-546858a1e4d13d179a6af27b474e1396cfdf0c29.tar.gz
dudki-546858a1e4d13d179a6af27b474e1396cfdf0c29.tar.bz2
the ability to check/kill/reload any of the processes being monitored added.
Diffstat (more/less context) (show whitespace changes)
-rw-r--r--man/dudki.8.in11
-rw-r--r--src/dudki.cc55
-rw-r--r--src/process.cc30
-rw-r--r--src/process.h2
4 files changed, 62 insertions, 36 deletions
diff --git a/man/dudki.8.in b/man/dudki.8.in
index 3011034..05db733 100644
--- a/man/dudki.8.in
+++ b/man/dudki.8.in
@@ -1,123 +1,128 @@
.TH dudki 8 "June 9th, 2004" "dudki(8)" "Klever Group (http://www.klever.net/)"
.hla en
.ds longopt @HAVE_GETOPT_LONG@
.SH NAME
dudki \- a process monitoring daemon
.SH SYNOPSYS
\fBdudki\fR [\fB-h\fR]
.if \*[longopt] [\fB--help\fR] [\fB--usage\fR]
[\fB-V\fR]
.if \*[longopt] [\fB--version\fR]
[\fB-L\fR]
.if \*[longopt] [\fB--license\fR]
[\fB-f\fR \fIconfigfile\fR]
.if \*[longopt] [\fB--config=\fR\fIconfigfile\fR]
[\fB-k\fR]
.if \*[longopt] [\fB--kill\fR]
[\fB-r\fR]
.if \*[longopt] [\fB--reload\fR]
[\fB-c\fR]
.if \*[longopt] [\fB--check\fR]
[\fB-e\fR]
.if \*[longopt] [\fB--ensure\fR]
[\fB-t\fR]
.if \*[longopt] [\fB--test\fR]
+[\fI<process-list>\fR]
.SH DESCRIPTION
dudki daemon is designed to run in the background and periodically
check if certain processes specified in the configuration file are
running. If a process is detected as dead dudki tries to restart it
using the command line specified in the configuration file and notifies
the specified contact (currently only via email).
.SH OPTIONS
.TP
.ie \*[longopt] \fB-f\fR \fIconfigfile\fR, \fB--config=\fR\fIconfigfile\fR
.el \fB-f\fR \fIconfigfile\fR
Specify the configuration file to use (default is
\fI@sysconfdir@/dudki.conf\fR).
.TP
.ie \*[longopt] \fB-k\fR, \fB--kill\fR
.el \fB-k\fR
-Stop the running instance by sending the \fBSIGTERM\fR signal.
+Stop the running instance by sending the \fBSIGTERM\fR signal. If no process
+name specified on the command line, dudki kills his own running instance.
.TP
.ie \*[longopt] \fB-r\fR, \fB--reload\fR
.el \fB-r\fR
-Reload the running instance by sending the \fBSIGHUP\fR signal.
+Reload the running instance by sending the \fBSIGHUP\fR signal. Like with
+\fB-k\fR, if no process name specified on the command line, dudki sends
+\fBSIGHUP\fR to his own running instance.
.TP
.ie \*[longopt] \fB-c\fR, \fB--check\fR
.el \fB-c\fR
-Check if dudki is running. Exit with non-zero status if not.
+Check if dudki is running. Exit with non-zero status if not. The same target
+rules as in \fB-k\fR and \fB-r\fR apply here.
.TP
.ie \*[longopt] \fB-e\fR, \fB--ensure\fR
.el \fB-e\fR
Ensure that dudki is running. Load, if not. Useful for running as a
cron job once in a while. If the daemon is running runs quietly
providing no output.
.TP
.ie \*[longopt] \fB-t\fR, \fB--test\fR
.el \fB-t\fR
Check the syntax of configuration file and exit.
.TP
.ie \*[longopt] \fB-h\fR, \fB--help\fR, \fB--usage\fR
.el \fB-h\fR
Display short usage message and exit.
.TP
.ie \*[longopt] \fB-V\fR, \fB--version\fR
.el \fB-V\fR
Report version and exit.
.TP
.ie \*[longopt] \fB-L\fR, \fB--license\fR
.el \fB-L\fR
Show licensing terms.
.SH EXIT STATUS
Zero in case of success, non-zero otherwise.
.SH FILES
.TP
@sysconfdir@/dudki.conf
Default configuration file.
.TP
/var/run/dudki.pid
Default pid-file for the dudki process.
.SH AUTHOR
Written by Michael 'hacker' Krelin <hacker@klever.net>
.SH COPYRIGHT
Copyright (c) 2004 Klever Group (http://www.klever.net/)
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
.SH BUGS
You tell me. Send reports to <dudki-bugs@klever.net>
.SH SEE ALSO
\fBdudki.conf\fR(5)
diff --git a/src/dudki.cc b/src/dudki.cc
index b4e95a7..e91ad5e 100644
--- a/src/dudki.cc
+++ b/src/dudki.cc
@@ -1,252 +1,269 @@
#include <unistd.h>
#include <signal.h>
#include <syslog.h>
#include <iostream>
#include <fstream>
#include <stdexcept>
using namespace std;
#include "configuration.h"
#include "util.h"
#include "config.h"
#ifdef HAVE_GETOPT_H
# include <getopt.h>
#endif
#ifndef DEFAULT_CONF_FILE
# define DEFAULT_CONF_FILE "/etc/dudki.conf"
#endif
#define PHEADER PACKAGE " Version " VERSION
#define PCOPY "Copyright (c) 2004 Klever Group"
bool finishing = false;
bool restarting = false;
static char **_argv = NULL;
static void lethal_signal_handler(int signum) {
syslog(LOG_NOTICE,"Lethal signal received. Terminating.");
finishing = true;
}
static void sighup_handler(int signum) {
syslog(LOG_NOTICE,"SUGHUP received, reloading.");
restarting = finishing = true;
}
void check_herd(configuration& config) {
for(processes_t::iterator i=config.processes.begin();i!=config.processes.end();++i)
i->second.check(i->first,config);
}
void signal_self(const configuration& config,int signum) {
ifstream pids(config.pidfile.c_str(),ios::in);
if(!pids)
throw runtime_error("Can't detect running instance");
pid_t pid = 0;
pids >> pid;
if(!pid)
throw runtime_error("Can't detect running instance");
if(pid==getpid())
throw 0;
if(kill(pid,signum))
throw runtime_error("Failed to signal running instance");
}
int main(int argc,char **argv) {
try {
_argv = new char*[argc+1];
if(!_argv)
throw runtime_error("memory allocation problem at the very start");
memmove(_argv,argv,sizeof(*_argv)*(argc+1));
string config_file = DEFAULT_CONF_FILE;
enum {
op_default,
op_work,
- op_hup,
- op_term,
- op_check,
+ op_signal,
op_ensure,
op_test
} op = op_default;
+ int op_signum = 0;
while(true) {
#define SHORTOPTSTRING "f:hVLrkcet"
#ifdef HAVE_GETOPT_LONG
static struct option opts[] = {
{ "help", no_argument, 0, 'h' },
{ "usage", no_argument, 0, 'h' },
{ "version", no_argument, 0, 'V' },
{ "license", no_argument, 0, 'L' },
{ "config", required_argument, 0, 'f' },
{ "kill", no_argument, 0, 'k' },
{ "reload", no_argument, 0, 'r' },
{ "check", no_argument, 0, 'c' },
{ "ensure", no_argument, 0, 'e' },
{ "test", no_argument, 0, 't' },
{ NULL, 0, 0, 0 }
};
int c = getopt_long(argc,argv,SHORTOPTSTRING,opts,NULL);
#else /* !HAVE_GETOPT_LONG */
int c = getopt(argc,argv,SHORTOPTSTRING);
#endif /* /HAVE_GETOPT_LONG */
if(c==-1)
break;
switch(c) {
case 'h':
cerr << PHEADER << endl
- << PCOPY << endl << endl <<
+ << PCOPY << endl << endl
+ << " " << argv[0] << " [options] [processes]" << endl << endl <<
#ifdef HAVE_GETOPT_LONG
" -h, --help\n"
" --usage display this text\n"
" -V, --version display version number\n"
" -L, --license show license\n"
" -f filename, --config=filename\n"
" specify the configuration file to use\n"
"\n"
- " -k, --kill stop running instance\n"
+ " -k, --kill stop running instance (send SIGTERM)\n"
" -r, --reload reload running instance (send SIGHUP)\n"
- " -c, --check check if dudki is running\n"
+ " -c, --check check if the process is running\n"
+ " (the above commands operate on dudki itself if no\n"
+ " process name has been specified)\n"
" -e, --ensure ensure that dudki is running\n"
" -t, --test test configuration file and exit"
#else /* !HAVE_GETOPT_LONG */
" -h display this text\n"
" -V display version number\n"
" -L show license\n"
" -f filename specify the configuration file to use\n"
"\n"
- " -k stop running instance\n"
+ " -k stop running instance (send SIGTERM)\n"
" -r reload running instance (send SIGHUP)\n"
- " -c check if dudki is running\n"
+ " -c check if the process is running\n"
+ " (the above commands operate on dudki itself if no\n"
+ " process name has been specified)\n"
" -e ensure that dudki is running\n"
" -t test configuration file and exit"
#endif /* /HAVE_GETOPT_LONG */
<< endl;
exit(0);
break;
case 'V':
cerr << VERSION << endl;
exit(0);
break;
case 'L':
extern const char *COPYING;
cerr << COPYING << endl;
exit(0);
break;
case 'f':
config_file = optarg;
break;
case 'k':
if(op!=op_default) {
cerr << "Can't obey two or more orders at once" << endl;
exit(1);
}
- op = op_term;
+ op = op_signal; op_signum = SIGTERM;
break;
case 'r':
if(op!=op_default) {
cerr << "Can't obey two or more orders at once" << endl;
exit(1);
}
- op = op_hup;
+ op = op_signal; op_signum = SIGHUP;
break;
case 'c':
if(op!=op_default) {
cerr << "Can't obey two or more orders at once" << endl;
exit(1);
}
- op = op_check;
+ op = op_signal; op_signum = 0;
break;
case 'e':
if(op!=op_default) {
cerr << "Can't obey two or more orders at once" << endl;
exit(1);
}
op = op_ensure;
break;
case 't':
if(op!=op_default) {
cerr << "Can't obey two or more orders at once" << endl;
exit(1);
}
op = op_test;
break;
default:
cerr << "Huh??" << endl;
exit(1);
break;
}
}
const char *sid = *argv;
const char *t;
while(t = index(sid,'/')) {
sid = t; sid++;
}
openlog(sid,LOG_CONS|LOG_PERROR|LOG_PID,LOG_DAEMON);
configuration config;
config.parse(config_file);
switch(op) {
case op_test:
cerr << "Configuration OK" << endl;
break;
- case op_hup:
- signal_self(config,SIGHUP);
- break;
- case op_term:
- signal_self(config,SIGTERM);
- break;
- case op_check:
+ case op_signal:
try{
- signal_self(config,0);
+ if(optind>=argc) {
+ signal_self(config,op_signum);
+ }else{
+ int failures = 0;
+ for(int narg=optind;narg<argc;narg++) {
+ try {
+ processes_t::const_iterator i = config.processes.find(argv[narg]);
+ if(i==config.processes.end())
+ throw runtime_error("no such process configured");
+ i->second.signal(op_signum);
+ }catch(exception& e) {
+ cerr << "dudki(" << argv[narg] << "): " << e.what() << endl;
+ failures++;
+ }
+ }
+ if(failures)
+ throw runtime_error("not all processes have been successfully signaled");
+ }
+ if(!op_signum)
exit(0);
}catch(exception& e) {
+ if(!op_signum)
exit(1);
}
case op_ensure:
try {
signal_self(config,0);
break;
}catch(exception& e) {
syslog(LOG_NOTICE,"The dudki process is down, taking its place");
config.daemonize = true;
}catch(int zero) {
// we throw zero in case we're ensuring that this very process is running.
// we don't have to daemonize if we're daemonic.
config.daemonize = false;
}
case op_default:
case op_work:
{
if(config.daemonize) {
pid_t pf = fork();
if(pf<0)
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to fork()");
if(pf) {
_exit(0);
}
}
pid_file pidfile;
pidfile.set(config.pidfile);
signal(SIGINT,lethal_signal_handler);
signal(SIGABRT,lethal_signal_handler);
signal(SIGTERM,lethal_signal_handler);
signal(SIGHUP,sighup_handler);
sigset_t sset;
sigemptyset(&sset);
sigaddset(&sset,SIGINT); sigaddset(&sset,SIGABRT);
sigaddset(&sset,SIGTERM); sigaddset(&sset,SIGHUP);
sigprocmask(SIG_UNBLOCK,&sset,NULL);
while(!finishing) {
check_herd(config);
sleep(config.check_interval);
}
if(restarting)
execvp(_argv[0],_argv);
}
break;
default:
throw runtime_error(string(__PRETTY_FUNCTION__)+": internal error");
}
}catch(exception& e) {
cerr << "Oops: " << e.what() << endl;
return 1;
}
}
diff --git a/src/process.cc b/src/process.cc
index bfab311..1ffac9f 100644
--- a/src/process.cc
+++ b/src/process.cc
@@ -1,187 +1,189 @@
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
#include <pwd.h>
#include <grp.h>
#include <sys/wait.h>
#include <syslog.h>
#include <errno.h>
#include <iostream>
#include <fstream>
#include <stdexcept>
using namespace std;
#include "process.h"
#include "configuration.h"
void process::check(const string& id,configuration& config) {
- bool running = false;
- ifstream pids(pidfile.c_str(),ios::in);
- if(pids) {
- pid_t pid = 0;
- pids >> pid;
- pids.close();
- if(pid) {
- if(!kill(pid,0)) {
- running = true;
- }
- }
- }
- if(running){
+ try {
+ signal(0);
patience = 0;
- }else{
+ }catch(exception& e) {
if(patience>60) { // TODO: configurable
patience = 0;
}else{
if(patience<10) { // TODO: configurable
syslog(LOG_NOTICE,"The process '%s' is down, trying to launch.",id.c_str());
do_notify(id,"Starting up",
"The named process seems to be down. Dudki will try\n"
"to revive it by running the specified command.\n",
config);
try {
launch(id,config);
}catch(exception& e) {
syslog(LOG_ERR,"Error trying to launch process '%s': %s",id.c_str(),e.what());
}
}else if(patience==10){ // TODO: configurable like the above
syslog(LOG_NOTICE,"Giving up on process '%s' for a while",id.c_str());
do_notify(id,"Giving up",
"After a number of attempts to relaunch the named process\n"
"It still seems to be down. Dudki is giving up attempts\n"
"to revive the process for a while.\n",
config);
}
patience++;
}
}
}
void process::launch(const string& id,configuration& config) {
uid_t uid = (uid_t)-1;
gid_t gid = (gid_t)-1;
if(!user.empty()) {
struct passwd *ptmp = getpwnam(user.c_str());
if(ptmp) {
uid = ptmp->pw_uid;
gid = ptmp->pw_gid;
}else{
errno=0;
uid = strtol(user.c_str(),NULL,0);
if(errno)
throw runtime_error("Failed to resolve User value to uid");
}
}
if(!group.empty()) {
struct group *gtmp = getgrnam(group.c_str());
if(gtmp) {
gid = gtmp->gr_gid;
}else{
errno = 0;
gid = strtol(group.c_str(),NULL,0);
if(errno)
throw runtime_error("Failed to reslove Group value to gid");
}
}
pid_t p = fork();
if(p<0)
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to fork()");
if(!p) {
// child
try {
setsid();
if(user.empty()) {
if((getgid()!=gid) && setgid(gid))
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to setgid()");
}else{
if(initgroups(user.c_str(),gid))
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to initgroups()");
}
if(!chroot.empty()) {
if(::chroot(chroot.c_str()))
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to chroot()");
}
if(!user.empty()) {
if((getuid()!=uid) && setuid(uid))
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to setuid()");
}
char *argv[] = { "/bin/sh", "-c", (char*)restart_cmd.c_str(), NULL };
close(0); close(1); close(2);
execv("/bin/sh",argv);
}catch(exception& e) {
syslog(LOG_ERR,"Error trying to launch process '%s': %s",id.c_str(),e.what());
}
_exit(-1);
}
// parent
int rv;
if(waitpid(p,&rv,0)<0)
throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to waitpid()");
}
void process::do_notify(const string& id,const string& event,const string& description,configuration& config) {
string the_notify;
if(!notify.empty())
the_notify=notify;
else if(!config.notify.empty())
the_notify=config.notify;
else
return;
try {
string::size_type colon = the_notify.find(':');
if(colon==string::npos)
throw runtime_error("invalid notify action specification");
string nschema = the_notify.substr(0,colon);
string ntarget = the_notify.substr(colon+1);
if(nschema=="mailto") {
notify_mailto(ntarget,id,event,description,config);
}else
throw runtime_error("unrecognized notification schema");
}catch(exception& e) {
syslog(LOG_ERR,"Notification error: %s",e.what());
}
}
void process::notify_mailto(const string& email,const string& id,const string& event,const string& description,configuration& config) {
int files[2];
if(pipe(files))
throw runtime_error("Failed to pipe()");
pid_t pid = vfork();
if(pid==-1) {
close(files[0]);
close(files[1]);
throw runtime_error("Failed to vfork()");
}
if(!pid) {
// child
if(dup2(files[0],0)!=0)
_exit(-1);
close(1);
close(files[0]);
close(files[1]);
execl("/usr/sbin/sendmail","usr/sbin/sendmail","-i",email.c_str(),NULL);
_exit(-1);
}
// parent
close(files[0]);
FILE *mta = fdopen(files[1],"w");
for(headers_t::const_iterator i=mailto_headers.begin();i!=mailto_headers.end();++i) {
fprintf(mta,"%s: %s\n",i->first.c_str(),i->second.c_str());
}
for(headers_t::const_iterator i=config.mailto_headers.begin();i!=config.mailto_headers.end();++i) {
if(mailto_headers.find(i->first)!=mailto_headers.end())
continue;
fprintf(mta,"%s: %s\n",i->first.c_str(),i->second.c_str());
}
fprintf(mta,
"Subject: [%s] %s\n\n"
"%s\n"
"---\n"
"This message was sent automatically by the 'dudki' daemon\n",
id.c_str(), event.c_str(),
description.c_str() );
fclose(mta);
int status;
waitpid(pid,&status,0);
// TODO: check the return code
}
+
+void process::signal(int signum) const {
+ ifstream pids(pidfile.c_str(),ios::in);
+ if(!pids)
+ throw runtime_error("no pidfile found");
+ pid_t pid = 0;
+ pids >> pid;
+ pids.close();
+ if(!pid)
+ throw runtime_error("no pid in pidfile");
+ if(kill(pid,signum))
+ throw runtime_error("failed to signal process");
+}
diff --git a/src/process.h b/src/process.h
index b6d7091..27ee049 100644
--- a/src/process.h
+++ b/src/process.h
@@ -1,36 +1,38 @@
#ifndef __PROCESS_H
#define __PROCESS_H
#include <string>
#include <map>
using namespace std;
class configuration;
typedef map<string,string> headers_t;
class process {
public:
string pidfile;
string restart_cmd;
string notify;
string user;
string group;
string chroot;
headers_t mailto_headers;
int patience;
process()
: patience(0) { }
void check(const string& id,configuration& config);
void launch(const string& id,configuration& config);
void do_notify(const string& id,const string& event,const string& description,configuration& config);
void notify_mailto(const string& email,const string& id,const string& event,
const string& description,configuration& config);
+
+ void signal(int signum) const;
};
typedef map<string,process> processes_t;
#endif /* __PROCESS_H */