-rw-r--r-- | NEWS.xml | 3 | ||||
-rw-r--r-- | man/dudki.conf.5.in | 6 | ||||
-rw-r--r-- | src/configuration.cc | 5 | ||||
-rw-r--r-- | src/dudki.cc | 5 | ||||
-rw-r--r-- | src/process.cc | 98 | ||||
-rw-r--r-- | src/process.h | 6 |
6 files changed, 120 insertions, 3 deletions
@@ -1,14 +1,15 @@ <?xml version="1.0" encoding="us-ascii"?> <news> - <version version="0.2" date="July 23rd, 2004"> + <version version="0.2" date="July 24th, 2004"> <ni>now dudki sends arbitrary signals to the processes being monitored from the command line</ni> + <ni>detection of running processes which do not keep pidfiles, using process name</ni> </version> <version version="0.1" date="July 21st, 2004"> <ni><kbd>initgroups()</kbd> called before executing <kbd>RestartCommand</kbd></ni> <ni>more civilized way of restarting on <kbd>SIGHUP</kbd></ni> <ni>minor changes to build process</ni> </version> <version version="0.0" date="July 11th, 2004"> <ni>Initial release</ni> </version> </news> diff --git a/man/dudki.conf.5.in b/man/dudki.conf.5.in index 23f636d..7d365d3 100644 --- a/man/dudki.conf.5.in +++ b/man/dudki.conf.5.in @@ -1,94 +1,98 @@ -.TH dudki.conf 5 "July 9th, 2004" "dudki.conf(5)" "Klever Group (http://www.klever.net/)" +.TH dudki.conf 5 "July 24th, 2004" "dudki.conf(5)" "Klever Group (http://www.klever.net/)" .hla en .SH NAME dudki.conf \- The configuration file for the dudki process monitoring daemon .SH SYNOPSIS The dudki.conf file is a runtime configuration file for the dudki process monitoring daemon. It contains the information about the process being monitored and the instruction on how to cope with the processes. .SH FILE FORMAT The file consist of the global configuration and per-process configuration sections. The global configuration controls general dudki behaviour and sets defaults for per-process configuration directives. .SH GLOBAL CONFIGURATION .TP \fBCheckInterval\fR \fIseconds\fR Specifies interval in seconds at wich dudki performs checks. .TP \fBDaemonize\fR \fIon/off\fR Specifies whether dudki daemon should fork itself into background. Normally, you don't want it to run in foreground. Unless you want to run it from \fIinit(8)\fR process (I've nevetr tried it) via \fIinittab(5)\fR. .TP \fBPidFile\fR \fIfilename\fR Specifies where to store dudki's process id (default is /var/run/dudki.pid) .TP \fBMailtoHeader\fR \fIheader\fR \fIcontent\fR Sets global defaults for process section's MailtoHeader directive. See below. .TP \fBNotify\fR \fIschema\fR:\fItarget\fR Sets global default for per process notification. See below. .TP \fB<Process\fR \fIidentifier\fR\fB>\fR Starts per process configuration section. The process will be referenced using the short descriptive name specified (for example in email notifications). .SH PER-PROCESS CONFIGURATION Per-process configuration sections specifies the processes to monitor. And parameters pertaining to the process. .TP \fBPidFile\fR \fIfilename\fR Specifies the file where to fetch process id of the process being monitored from. The absence of file, as well as the absence of process specified by the pid stored in the file signifies the process death and triggers restart. .TP +\fBProcessName\fR \fIprocess name\fR +Specifies the name of the process. The alternative way to find process if it +doesn't keep pid in the file. Similar to \fBpidof\fR(1). +.TP \fBRestartCommand\fR \fIcommand\fR Specifies the command to run in order to restart the process. .TP \fBUser\fR \fIuser\fR Specifies the unix user to change to before executing the command specified by \fBRestartCommand\fR. .TP \fBGroup\fR \fIgroup\fR Specifies the unix group to change to before executing the command specified by \fBRestartCommand\fR. .TP \fBChroot\fR \fIpath\fR Specifies the directory to set filesystem root to before executing the command specified by by \fBRestartCommand\fR. .TP \fBMailtoHeader\fR \fIheader\fR \fIcontent\fR Specifies extra headers to add to mailto: notifications sent by the dudki daemon. Headers specified in per-process section override the ones specified globally.A .TP \fBNotify\fR \fIschema\fR:\fItarget\fR Specifies the contact to be notified whenever something notable happens to the process. The only schema currently supported is 'mailto:'. .TP \fB</Process>\fR Signifies the end of process section. .SH EXAMPLE .br CheckInterval 60 .br diff --git a/src/configuration.cc b/src/configuration.cc index eb010c1..edc8c04 100644 --- a/src/configuration.cc +++ b/src/configuration.cc @@ -63,87 +63,92 @@ static DOTCONF_CB(dco_mailto_header) { dc_context *dcc = (dc_context*)ctx; return "Unexpected MailtoHeader"; } return NULL; } static DOTCONF_CB(dco_notify) { dc_context *dcc = (dc_context*)ctx; switch(dcc->ctx) { case DCC_ROOT: dcc->cf->notify = cmd->data.str; break; case DCC_PROCESS: dcc->ps->notify = cmd->data.str; break; default: return "Unexpected Notify"; } return NULL; } static DOTCONF_CB(dco_process) { dc_context *dcc = (dc_context*)ctx; string id = cmd->data.str; if(id[id.length()-1]=='>') id.erase(id.length()-1); dcc->ps = &(dcc->cf->processes[id]); dcc->ctx = DCC_PROCESS; return NULL; } static DOTCONF_CB(dco__process) { dc_context *dcc = (dc_context*)ctx; dcc->ps = NULL; dcc->ctx = DCC_ROOT; return NULL; } +static DOTCONF_CB(dco_process_name) { dc_context *dcc = (dc_context*)ctx; + dcc->ps->process_name = cmd->data.str; + return NULL; +} static DOTCONF_CB(dco_restart_command) { dc_context *dcc = (dc_context*)ctx; dcc->ps->restart_cmd = cmd->data.str; return NULL; } static DOTCONF_CB(dco_user) { dc_context *dcc = (dc_context*)ctx; dcc->ps->user = cmd->data.str; return NULL; } static DOTCONF_CB(dco_group) { dc_context *dcc = (dc_context*)ctx; dcc->ps->group = cmd->data.str; return NULL; } static DOTCONF_CB(dco_chroot) { dc_context *dcc = (dc_context*)ctx; dcc->ps->chroot = cmd->data.str; return NULL; } static const configoption_t dc_options[] = { { "CheckInterval", ARG_INT, dco_check_interval, NULL, DCC_ROOT }, { "Daemonize", ARG_TOGGLE, dco_daemonize, NULL, DCC_ROOT }, { "PidFile", ARG_STR, dco_pid_file, NULL, DCC_ROOT|DCC_PROCESS }, { "MailtoHeader", ARG_STR, dco_mailto_header, NULL, DCC_ROOT|DCC_PROCESS }, { "Notify", ARG_STR, dco_notify, NULL, DCC_ROOT|DCC_PROCESS }, { "<Process", ARG_STR, dco_process, NULL, DCC_ROOT }, + { "ProcessName", ARG_STR, dco_process_name, NULL, DCC_PROCESS }, { "RestartCommand", ARG_STR, dco_restart_command, NULL, DCC_PROCESS }, { "User", ARG_STR, dco_user, NULL, DCC_PROCESS }, { "Group", ARG_STR, dco_group, NULL, DCC_PROCESS }, { "Chroot", ARG_STR, dco_chroot, NULL, DCC_PROCESS }, { "</Process>", ARG_NONE, dco__process, NULL, DCC_PROCESS }, LAST_OPTION }; static const char *dc_context_checker(command_t *cmd,unsigned long mask) { dc_context *dcc = (dc_context*)cmd->context; if( (mask==CTX_ALL) || ((mask&dcc->ctx)==dcc->ctx) ) return NULL; return "misplaced option"; } static FUNC_ERRORHANDLER(dc_error_handler) { throw runtime_error(string("error parsing config file: ")+msg); } void configuration::parse(const string& cfile) { struct dc_context dcc; dcc.cf = this; dcc.ctx = DCC_ROOT; configfile_t *cf = dotconf_create((char*)cfile.c_str(),dc_options,(context_t*)&dcc,CASE_INSENSITIVE); if(!cf) throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to dotconf_create()"); cf->errorhandler = (dotconf_errorhandler_t) dc_error_handler; cf->contextchecker = (dotconf_contextchecker_t) dc_context_checker; if(!dotconf_command_loop(cf)) throw runtime_error(string(__PRETTY_FUNCTION__)+": failed to dotconf_command_loop()"); dotconf_cleanup(cf); } diff --git a/src/dudki.cc b/src/dudki.cc index 9562079..c966695 100644 --- a/src/dudki.cc +++ b/src/dudki.cc @@ -6,66 +6,68 @@ #include <fstream> #include <stdexcept> using namespace std; #include "configuration.h" #include "util.h" #include "config.h" #ifdef HAVE_GETOPT_H # include <getopt.h> #endif #ifndef DEFAULT_CONF_FILE # define DEFAULT_CONF_FILE "/etc/dudki.conf" #endif #define PHEADER PACKAGE " Version " VERSION #define PCOPY "Copyright (c) 2004 Klever Group" bool finishing = false; bool restarting = false; static char **_argv = NULL; static void lethal_signal_handler(int signum) { syslog(LOG_NOTICE,"Lethal signal received. Terminating."); finishing = true; } static void sighup_handler(int signum) { syslog(LOG_NOTICE,"SUGHUP received, reloading."); restarting = finishing = true; } void check_herd(configuration& config) { + process::prepare_herd(); for(processes_t::iterator i=config.processes.begin();i!=config.processes.end();++i) i->second.check(i->first,config); + process::unprepare_herd(); } void signal_self(const configuration& config,int signum) { ifstream pids(config.pidfile.c_str(),ios::in); if(!pids) throw runtime_error("Can't detect running instance"); pid_t pid = 0; pids >> pid; if(!pid) throw runtime_error("Can't detect running instance"); if(pid==getpid()) throw 0; if(kill(pid,signum)) throw runtime_error("Failed to signal running instance"); } int main(int argc,char **argv) { try { _argv = new char*[argc+1]; if(!_argv) throw runtime_error("memory allocation problem at the very start"); memmove(_argv,argv,sizeof(*_argv)*(argc+1)); string config_file = DEFAULT_CONF_FILE; enum { op_default, op_work, op_signal, op_ensure, op_test } op = op_default; int op_signum = 0; while(true) { @@ -191,65 +193,68 @@ int main(int argc,char **argv) { exit(1); } break; default: cerr << "Huh??" << endl; exit(1); break; } } const char *sid = *argv; const char *t; while(t = index(sid,'/')) { sid = t; sid++; } openlog(sid,LOG_CONS|LOG_PERROR|LOG_PID,LOG_DAEMON); configuration config; config.parse(config_file); switch(op) { case op_test: cerr << "Configuration OK" << endl; break; case op_signal: try { if(optind>=argc) { signal_self(config,op_signum); }else{ int failures = 0; for(int narg=optind;narg<argc;narg++) { try { processes_t::const_iterator i = config.processes.find(argv[narg]); if(i==config.processes.end()) throw runtime_error("no such process configured"); + if(op_signum) i->second.signal(op_signum); + else + i->second.check(); }catch(exception& e) { cerr << "dudki(" << argv[narg] << "): " << e.what() << endl; failures++; } } if(failures) throw runtime_error("not all processes have been successfully signaled"); } if(!op_signum) exit(0); }catch(exception& e) { if(!op_signum) exit(1); } case op_ensure: try { signal_self(config,0); break; }catch(exception& e) { syslog(LOG_NOTICE,"The dudki process is down, taking its place"); config.daemonize = true; }catch(int zero) { // we throw zero in case we're ensuring that this very process is running. // we don't have to daemonize if we're daemonic. config.daemonize = false; } case op_default: case op_work: { if(config.daemonize) { pid_t pf = fork(); if(pf<0) diff --git a/src/process.cc b/src/process.cc index 1ffac9f..8a5b5d2 100644 --- a/src/process.cc +++ b/src/process.cc @@ -1,51 +1,67 @@ #include <stdio.h> #include <sys/types.h> #include <unistd.h> #include <signal.h> #include <pwd.h> #include <grp.h> +#include <dirent.h> #include <sys/wait.h> #include <syslog.h> #include <errno.h> #include <iostream> #include <fstream> +#include <sstream> #include <stdexcept> +#include <string> +#include <vector> using namespace std; #include "process.h" #include "configuration.h" +static multimap<string,pid_t> procpids; + +void process::check() const { + if(!pidfile.empty()) { + signal(0); + }else if(!process_name.empty()) { + if(procpids.empty()) + gather_proc_info(); + if(procpids.find(process_name)==procpids.end()) + throw runtime_error("no such process"); + } // XXX: or else? +} void process::check(const string& id,configuration& config) { try { - signal(0); + check(); patience = 0; }catch(exception& e) { if(patience>60) { // TODO: configurable patience = 0; }else{ if(patience<10) { // TODO: configurable syslog(LOG_NOTICE,"The process '%s' is down, trying to launch.",id.c_str()); do_notify(id,"Starting up", "The named process seems to be down. Dudki will try\n" "to revive it by running the specified command.\n", config); try { launch(id,config); }catch(exception& e) { syslog(LOG_ERR,"Error trying to launch process '%s': %s",id.c_str(),e.what()); } }else if(patience==10){ // TODO: configurable like the above syslog(LOG_NOTICE,"Giving up on process '%s' for a while",id.c_str()); do_notify(id,"Giving up", "After a number of attempts to relaunch the named process\n" "It still seems to be down. Dudki is giving up attempts\n" "to revive the process for a while.\n", config); } patience++; } } } void process::launch(const string& id,configuration& config) { uid_t uid = (uid_t)-1; gid_t gid = (gid_t)-1; @@ -147,43 +163,123 @@ void process::notify_mailto(const string& email,const string& id,const string& e _exit(-1); close(1); close(files[0]); close(files[1]); execl("/usr/sbin/sendmail","usr/sbin/sendmail","-i",email.c_str(),NULL); _exit(-1); } // parent close(files[0]); FILE *mta = fdopen(files[1],"w"); for(headers_t::const_iterator i=mailto_headers.begin();i!=mailto_headers.end();++i) { fprintf(mta,"%s: %s\n",i->first.c_str(),i->second.c_str()); } for(headers_t::const_iterator i=config.mailto_headers.begin();i!=config.mailto_headers.end();++i) { if(mailto_headers.find(i->first)!=mailto_headers.end()) continue; fprintf(mta,"%s: %s\n",i->first.c_str(),i->second.c_str()); } fprintf(mta, "Subject: [%s] %s\n\n" "%s\n" "---\n" "This message was sent automatically by the 'dudki' daemon\n", id.c_str(), event.c_str(), description.c_str() ); fclose(mta); int status; waitpid(pid,&status,0); // TODO: check the return code } void process::signal(int signum) const { + if(!pidfile.empty()) { ifstream pids(pidfile.c_str(),ios::in); if(!pids) throw runtime_error("no pidfile found"); pid_t pid = 0; pids >> pid; pids.close(); if(!pid) throw runtime_error("no pid in pidfile"); if(kill(pid,signum)) throw runtime_error("failed to signal process"); + }else if(!process_name.empty()) { + if(procpids.empty()) + gather_proc_info(); + pair<multimap<string,pid_t>::const_iterator,multimap<string,pid_t>::const_iterator> range = procpids.equal_range(process_name); + int count = 0; + for(multimap<string,pid_t>::const_iterator i=range.first;i!=range.second;++i) { + pid_t pid = i->second; + if(kill(i->second,signum)) + throw runtime_error("failed to signal process"); + count++; + } + if(!count) + throw runtime_error("no running instance detected"); + }else + throw runtime_error("nothing is known about the process"); +} + +void process::prepare_herd() { + procpids.clear(); +} +void process::unprepare_herd() { + procpids.clear(); +} +void process::gather_proc_info() { + vector<pid_t> allpids; + DIR *pd = opendir("/proc"); + if(!pd) + throw runtime_error("failed to open /proc"); + struct dirent *pde; + pid_t selfpid = getpid(); + while(pde=readdir(pd)) { + errno=0; + pid_t pid = atoi(pde->d_name); + if((!pid) || pid==selfpid) + continue; + allpids.push_back(pid); + } + closedir(pd); + char s[256]; + procpids.clear(); + for(vector<pid_t>::const_iterator i=allpids.begin();i!=allpids.end();++i) { + int r = snprintf(s,sizeof(s),"/proc/%d/stat",*i); + if(r>=sizeof(s) || r<1) + continue; + string cmd; + ifstream ss(s,ios::in); + if(!ss) + continue; + getline(ss,cmd); + string::size_type op = cmd.find('('); + if(op==string::npos) + continue; + cmd.erase(0,op+1); + string::size_type cp = cmd.find(')'); + if(cp==string::npos) + continue; + cmd.erase(cp); + r = snprintf(s,sizeof(s),"/proc/%d/cmdline",*i); + if(r>=sizeof(s) || r<1) + continue; + ifstream cs(s,ios::binary); + if(!cs) + continue; + string command; + while(cs) { + string cl; + getline(cs,cl,(char)0); + string::size_type lsl = cl.rfind('/'); + if(lsl!=string::npos) + cl.erase(0,lsl+1); + if(cl.substr(0,cmd.length())==cmd) { + command = cl; + break; + } + } + procpids.insert(pair<string,pid_t>(cmd,*i)); + if((!command.empty()) && cmd!=command) + procpids.insert(pair<string,pid_t>(command,*i)); + } } diff --git a/src/process.h b/src/process.h index 27ee049..90b12d9 100644 --- a/src/process.h +++ b/src/process.h @@ -1,38 +1,44 @@ #ifndef __PROCESS_H #define __PROCESS_H #include <string> #include <map> using namespace std; class configuration; typedef map<string,string> headers_t; class process { public: string pidfile; + string process_name; string restart_cmd; string notify; string user; string group; string chroot; headers_t mailto_headers; int patience; process() : patience(0) { } void check(const string& id,configuration& config); void launch(const string& id,configuration& config); void do_notify(const string& id,const string& event,const string& description,configuration& config); void notify_mailto(const string& email,const string& id,const string& event, const string& description,configuration& config); void signal(int signum) const; + void check() const; + + static void prepare_herd(); + static void gather_proc_info(); + static void unprepare_herd(); }; typedef map<string,process> processes_t; #endif /* __PROCESS_H */ |