-rw-r--r-- | NEWS.xml | 3 | ||||
-rw-r--r-- | man/dudki.conf.5.in | 6 | ||||
-rw-r--r-- | src/configuration.cc | 5 | ||||
-rw-r--r-- | src/dudki.cc | 5 | ||||
-rw-r--r-- | src/process.cc | 98 | ||||
-rw-r--r-- | src/process.h | 6 |
6 files changed, 120 insertions, 3 deletions
@@ -1,14 +1,15 @@ <?xml version="1.0" encoding="us-ascii"?> <news> - <version version="0.2" date="July 23rd, 2004"> + <version version="0.2" date="July 24th, 2004"> <ni>now dudki sends arbitrary signals to the processes being monitored from the command line</ni> + <ni>detection of running processes which do not keep pidfiles, using process name</ni> </version> <version version="0.1" date="July 21st, 2004"> <ni><kbd>initgroups()</kbd> called before executing <kbd>RestartCommand</kbd></ni> <ni>more civilized way of restarting on <kbd>SIGHUP</kbd></ni> <ni>minor changes to build process</ni> </version> <version version="0.0" date="July 11th, 2004"> <ni>Initial release</ni> </version> </news> diff --git a/man/dudki.conf.5.in b/man/dudki.conf.5.in index 23f636d..7d365d3 100644 --- a/man/dudki.conf.5.in +++ b/man/dudki.conf.5.in @@ -1,13 +1,13 @@ -.TH dudki.conf 5 "July 9th, 2004" "dudki.conf(5)" "Klever Group (http://www.klever.net/)" +.TH dudki.conf 5 "July 24th, 2004" "dudki.conf(5)" "Klever Group (http://www.klever.net/)" .hla en .SH NAME dudki.conf \- The configuration file for the dudki process monitoring daemon .SH SYNOPSIS The dudki.conf file is a runtime configuration file for the dudki process monitoring daemon. It contains the information about the process being monitored and the instruction on how to cope with the processes. @@ -51,24 +51,28 @@ notifications). .SH PER-PROCESS CONFIGURATION Per-process configuration sections specifies the processes to monitor. And parameters pertaining to the process. .TP \fBPidFile\fR \fIfilename\fR Specifies the file where to fetch process id of the process being monitored from. The absence of file, as well as the absence of process specified by the pid stored in the file signifies the process death and triggers restart. .TP +\fBProcessName\fR \fIprocess name\fR +Specifies the name of the process. The alternative way to find process if it +doesn't keep pid in the file. Similar to \fBpidof\fR(1). +.TP \fBRestartCommand\fR \fIcommand\fR Specifies the command to run in order to restart the process. .TP \fBUser\fR \fIuser\fR Specifies the unix user to change to before executing the command specified by \fBRestartCommand\fR. .TP \fBGroup\fR \fIgroup\fR Specifies the unix group to change to before executing the command specified by \fBRestartCommand\fR. .TP \fBChroot\fR \fIpath\fR diff --git a/src/configuration.cc b/src/configuration.cc index eb010c1..edc8c04 100644 --- a/src/configuration.cc +++ b/src/configuration.cc @@ -83,48 +83,53 @@ static DOTCONF_CB(dco_process) { dc_context *dcc = (dc_context*)ctx; if(id[id.length()-1]=='>') id.erase(id.length()-1); dcc->ps = &(dcc->cf->processes[id]); dcc->ctx = DCC_PROCESS; return NULL; } static DOTCONF_CB(dco__process) { dc_context *dcc = (dc_context*)ctx; dcc->ps = NULL; dcc->ctx = DCC_ROOT; return NULL; } +static DOTCONF_CB(dco_process_name) { dc_context *dcc = (dc_context*)ctx; + dcc->ps->process_name = cmd->data.str; + return NULL; +} static DOTCONF_CB(dco_restart_command) { dc_context *dcc = (dc_context*)ctx; dcc->ps->restart_cmd = cmd->data.str; return NULL; } static DOTCONF_CB(dco_user) { dc_context *dcc = (dc_context*)ctx; dcc->ps->user = cmd->data.str; return NULL; } static DOTCONF_CB(dco_group) { dc_context *dcc = (dc_context*)ctx; dcc->ps->group = cmd->data.str; return NULL; } static DOTCONF_CB(dco_chroot) { dc_context *dcc = (dc_context*)ctx; dcc->ps->chroot = cmd->data.str; return NULL; } static const configoption_t dc_options[] = { { "CheckInterval", ARG_INT, dco_check_interval, NULL, DCC_ROOT }, { "Daemonize", ARG_TOGGLE, dco_daemonize, NULL, DCC_ROOT }, { "PidFile", ARG_STR, dco_pid_file, NULL, DCC_ROOT|DCC_PROCESS }, { "MailtoHeader", ARG_STR, dco_mailto_header, NULL, DCC_ROOT|DCC_PROCESS }, { "Notify", ARG_STR, dco_notify, NULL, DCC_ROOT|DCC_PROCESS }, { "<Process", ARG_STR, dco_process, NULL, DCC_ROOT }, + { "ProcessName", ARG_STR, dco_process_name, NULL, DCC_PROCESS }, { "RestartCommand", ARG_STR, dco_restart_command, NULL, DCC_PROCESS }, { "User", ARG_STR, dco_user, NULL, DCC_PROCESS }, { "Group", ARG_STR, dco_group, NULL, DCC_PROCESS }, { "Chroot", ARG_STR, dco_chroot, NULL, DCC_PROCESS }, { "</Process>", ARG_NONE, dco__process, NULL, DCC_PROCESS }, LAST_OPTION }; static const char *dc_context_checker(command_t *cmd,unsigned long mask) { dc_context *dcc = (dc_context*)cmd->context; if( (mask==CTX_ALL) || ((mask&dcc->ctx)==dcc->ctx) ) return NULL; diff --git a/src/dudki.cc b/src/dudki.cc index 9562079..c966695 100644 --- a/src/dudki.cc +++ b/src/dudki.cc @@ -26,26 +26,28 @@ bool restarting = false; static char **_argv = NULL; static void lethal_signal_handler(int signum) { syslog(LOG_NOTICE,"Lethal signal received. Terminating."); finishing = true; } static void sighup_handler(int signum) { syslog(LOG_NOTICE,"SUGHUP received, reloading."); restarting = finishing = true; } void check_herd(configuration& config) { + process::prepare_herd(); for(processes_t::iterator i=config.processes.begin();i!=config.processes.end();++i) i->second.check(i->first,config); + process::unprepare_herd(); } void signal_self(const configuration& config,int signum) { ifstream pids(config.pidfile.c_str(),ios::in); if(!pids) throw runtime_error("Can't detect running instance"); pid_t pid = 0; pids >> pid; if(!pid) throw runtime_error("Can't detect running instance"); if(pid==getpid()) throw 0; @@ -211,25 +213,28 @@ int main(int argc,char **argv) { break; case op_signal: try { if(optind>=argc) { signal_self(config,op_signum); }else{ int failures = 0; for(int narg=optind;narg<argc;narg++) { try { processes_t::const_iterator i = config.processes.find(argv[narg]); if(i==config.processes.end()) throw runtime_error("no such process configured"); + if(op_signum) i->second.signal(op_signum); + else + i->second.check(); }catch(exception& e) { cerr << "dudki(" << argv[narg] << "): " << e.what() << endl; failures++; } } if(failures) throw runtime_error("not all processes have been successfully signaled"); } if(!op_signum) exit(0); }catch(exception& e) { if(!op_signum) diff --git a/src/process.cc b/src/process.cc index 1ffac9f..8a5b5d2 100644 --- a/src/process.cc +++ b/src/process.cc @@ -1,31 +1,47 @@ #include <stdio.h> #include <sys/types.h> #include <unistd.h> #include <signal.h> #include <pwd.h> #include <grp.h> +#include <dirent.h> #include <sys/wait.h> #include <syslog.h> #include <errno.h> #include <iostream> #include <fstream> +#include <sstream> #include <stdexcept> +#include <string> +#include <vector> using namespace std; #include "process.h" #include "configuration.h" +static multimap<string,pid_t> procpids; + +void process::check() const { + if(!pidfile.empty()) { + signal(0); + }else if(!process_name.empty()) { + if(procpids.empty()) + gather_proc_info(); + if(procpids.find(process_name)==procpids.end()) + throw runtime_error("no such process"); + } // XXX: or else? +} void process::check(const string& id,configuration& config) { try { - signal(0); + check(); patience = 0; }catch(exception& e) { if(patience>60) { // TODO: configurable patience = 0; }else{ if(patience<10) { // TODO: configurable syslog(LOG_NOTICE,"The process '%s' is down, trying to launch.",id.c_str()); do_notify(id,"Starting up", "The named process seems to be down. Dudki will try\n" "to revive it by running the specified command.\n", config); try { @@ -167,23 +183,103 @@ void process::notify_mailto(const string& email,const string& id,const string& e "%s\n" "---\n" "This message was sent automatically by the 'dudki' daemon\n", id.c_str(), event.c_str(), description.c_str() ); fclose(mta); int status; waitpid(pid,&status,0); // TODO: check the return code } void process::signal(int signum) const { + if(!pidfile.empty()) { ifstream pids(pidfile.c_str(),ios::in); if(!pids) throw runtime_error("no pidfile found"); pid_t pid = 0; pids >> pid; pids.close(); if(!pid) throw runtime_error("no pid in pidfile"); if(kill(pid,signum)) throw runtime_error("failed to signal process"); + }else if(!process_name.empty()) { + if(procpids.empty()) + gather_proc_info(); + pair<multimap<string,pid_t>::const_iterator,multimap<string,pid_t>::const_iterator> range = procpids.equal_range(process_name); + int count = 0; + for(multimap<string,pid_t>::const_iterator i=range.first;i!=range.second;++i) { + pid_t pid = i->second; + if(kill(i->second,signum)) + throw runtime_error("failed to signal process"); + count++; + } + if(!count) + throw runtime_error("no running instance detected"); + }else + throw runtime_error("nothing is known about the process"); +} + +void process::prepare_herd() { + procpids.clear(); +} +void process::unprepare_herd() { + procpids.clear(); +} +void process::gather_proc_info() { + vector<pid_t> allpids; + DIR *pd = opendir("/proc"); + if(!pd) + throw runtime_error("failed to open /proc"); + struct dirent *pde; + pid_t selfpid = getpid(); + while(pde=readdir(pd)) { + errno=0; + pid_t pid = atoi(pde->d_name); + if((!pid) || pid==selfpid) + continue; + allpids.push_back(pid); + } + closedir(pd); + char s[256]; + procpids.clear(); + for(vector<pid_t>::const_iterator i=allpids.begin();i!=allpids.end();++i) { + int r = snprintf(s,sizeof(s),"/proc/%d/stat",*i); + if(r>=sizeof(s) || r<1) + continue; + string cmd; + ifstream ss(s,ios::in); + if(!ss) + continue; + getline(ss,cmd); + string::size_type op = cmd.find('('); + if(op==string::npos) + continue; + cmd.erase(0,op+1); + string::size_type cp = cmd.find(')'); + if(cp==string::npos) + continue; + cmd.erase(cp); + r = snprintf(s,sizeof(s),"/proc/%d/cmdline",*i); + if(r>=sizeof(s) || r<1) + continue; + ifstream cs(s,ios::binary); + if(!cs) + continue; + string command; + while(cs) { + string cl; + getline(cs,cl,(char)0); + string::size_type lsl = cl.rfind('/'); + if(lsl!=string::npos) + cl.erase(0,lsl+1); + if(cl.substr(0,cmd.length())==cmd) { + command = cl; + break; + } + } + procpids.insert(pair<string,pid_t>(cmd,*i)); + if((!command.empty()) && cmd!=command) + procpids.insert(pair<string,pid_t>(command,*i)); + } } diff --git a/src/process.h b/src/process.h index 27ee049..90b12d9 100644 --- a/src/process.h +++ b/src/process.h @@ -3,36 +3,42 @@ #include <string> #include <map> using namespace std; class configuration; typedef map<string,string> headers_t; class process { public: string pidfile; + string process_name; string restart_cmd; string notify; string user; string group; string chroot; headers_t mailto_headers; int patience; process() : patience(0) { } void check(const string& id,configuration& config); void launch(const string& id,configuration& config); void do_notify(const string& id,const string& event,const string& description,configuration& config); void notify_mailto(const string& email,const string& id,const string& event, const string& description,configuration& config); void signal(int signum) const; + void check() const; + + static void prepare_herd(); + static void gather_proc_info(); + static void unprepare_herd(); }; typedef map<string,process> processes_t; #endif /* __PROCESS_H */ |